#libraries
import pandas as pd
import numpy as np
import datetime
from datetime import datetime
import csv
import os
from os import listdir
import json
import csv
import sys
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import RNN
#from keras.utils.np_utils import to_categorical
import keras.backend as K
from keras import regularizers,optimizers
from keras.models import load_model
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn import tree
#from sklearn.externals.six import StringIO
#import six
from six import StringIO
from IPython.display import Image
from sklearn.tree import export_graphviz
import pydotplus
np.random.seed(2018)
2023-08-23 13:08:02.770155: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. 2023-08-23 13:08:03.655052: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. 2023-08-23 13:08:03.661031: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. 2023-08-23 13:08:06.240519: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
import pymysql
#establish the connection to the mysql database
host = "192.168.88.187"
port = "3306"
user = "backblaze"
password = "Testing.2023"
database = "backblaze_ml_full"
conn = pymysql.connect(
host=host,
port=int(3306),
user=user,
passwd=password,
db=database,
charset='utf8mb4')
sqldf = pd.read_sql_query("select date, serial_number, model, capacity_bytes, days_to_failure, failure, smart_1_normalized, smart_3_normalized, smart_5_normalized, smart_7_normalized, smart_9_normalized, smart_187_normalized, smart_189_normalized, smart_194_normalized, smart_197_normalized from drive_stats where date >= '2014-03-01' and serial_number in (select distinct(serial_number) from drive_stats where failure=1 and date >= '2014-03-01')", conn)
sqldf
/tmp/ipykernel_2225929/1261091465.py:1: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy. sqldf = pd.read_sql_query("select date, serial_number, model, capacity_bytes, days_to_failure, failure, smart_1_normalized, smart_3_normalized, smart_5_normalized, smart_7_normalized, smart_9_normalized, smart_187_normalized, smart_189_normalized, smart_194_normalized, smart_197_normalized from drive_stats where date >= '2014-03-01' and serial_number in (select distinct(serial_number) from drive_stats where failure=1 and date >= '2014-03-01')", conn)
date | serial_number | model | capacity_bytes | days_to_failure | failure | smart_1_normalized | smart_3_normalized | smart_5_normalized | smart_7_normalized | smart_9_normalized | smart_187_normalized | smart_189_normalized | smart_194_normalized | smart_197_normalized | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2014-03-01 | MJ1311YNG36USA | Hitachi HDS5C3030ALA630 | 3000592982016 | 991 | 0 | 100.0 | 138.0 | 100.0 | 100.0 | 98.0 | NaN | NaN | 253.0 | 100.0 |
1 | 2014-03-01 | MJ1311YNG733NA | Hitachi HDS5C3030ALA630 | 3000592982016 | 840 | 0 | 100.0 | 100.0 | 100.0 | 100.0 | 98.0 | NaN | NaN | 250.0 | 100.0 |
2 | 2014-03-01 | W3009AX6 | ST4000DM000 | 4000787030016 | 54 | 0 | 119.0 | 91.0 | 100.0 | 87.0 | 93.0 | 100.0 | 99.0 | 26.0 | 100.0 |
3 | 2014-03-01 | WD-WCAV5M690585 | WDC WD10EADS | 1000204886016 | 409 | 0 | 200.0 | 191.0 | 200.0 | 100.0 | 68.0 | NaN | NaN | 127.0 | 200.0 |
4 | 2014-03-01 | S1F0CSW2 | ST3000DM001 | 3000592982016 | 229 | 0 | 114.0 | 92.0 | 100.0 | 89.0 | 84.0 | 100.0 | 100.0 | 23.0 | 100.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
14769522 | 2023-03-31 | 7LZ01G30 | Seagate BarraCuda SSD ZA250CM10002 | 250059350016 | 0 | 0 | 100.0 | NaN | NaN | NaN | 100.0 | NaN | NaN | 83.0 | NaN |
14769523 | 2023-03-31 | 9JG4657T | WDC WUH721414ALE6L4 | 14000519643136 | 0 | 0 | 100.0 | 85.0 | 100.0 | 100.0 | 98.0 | NaN | NaN | 55.0 | 100.0 |
14769524 | 2023-03-31 | 6090A00RFVKG | TOSHIBA MG08ACA16TA | 16000900661248 | 0 | 0 | 100.0 | 100.0 | 100.0 | 100.0 | 87.0 | NaN | NaN | 100.0 | 100.0 |
14769525 | 2023-03-31 | 51R0A2Q8FVGG | TOSHIBA MG08ACA16TE | 16000900661248 | 0 | 0 | 100.0 | 100.0 | 100.0 | 100.0 | 70.0 | NaN | NaN | 100.0 | 100.0 |
14769526 | 2023-03-31 | 7QT032NR | Seagate BarraCuda 120 SSD ZA250CM10003 | 250059350016 | 0 | 0 | 100.0 | NaN | NaN | NaN | 100.0 | NaN | NaN | 96.0 | NaN |
14769527 rows × 15 columns
def computeDay(group):
group = group.sort_values('date') #ordino in base ai giorni... dal più recente al meno
group['DayToFailure'] = list(range(group.shape[0]-1, -1,-1 ))
return group
#override the series_to_supervised method to work without classes
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = data.shape[1]
cols, names = list(), list()
dataclass = data[data.columns[-1:]]
data = data.drop(columns= ['serial_number', 'DayToFailure'], axis = 1)
columns = data.columns
# input sequence (t-n, ... t-1) #non arrivo all'osservazione corrente
for i in range(n_in-1, 0, -1):
cols.append(data.shift(i))
names += [(element + '(t-%d)' % (i)) for element in columns]
for i in range(0, n_out):
cols.append(data.shift(-i))
if i == 0:
names += [(element+'(t)') for element in columns]
else:
names += [(element +'(t+%d)' % (i)) for element in columns]
cols.append(dataclass) #appendo le ultime cinque colonne
names += ['DayToFailure']
agg = pd.concat(cols, axis=1)
agg.columns = names
if dropnan:
agg.dropna(inplace=True)
return agg
#Preprocessing
df = sqldf.copy()
df = df.drop('model', axis=1)
df = df.drop('capacity_bytes', axis=1)
df.date = pd.to_datetime(df.date, format='%Y-%m-%d').dt.date
scaler = MinMaxScaler(feature_range = (-1,1))
df[['smart_1_normalized', 'smart_3_normalized', 'smart_5_normalized', 'smart_7_normalized',
'smart_9_normalized', 'smart_187_normalized', 'smart_189_normalized', 'smart_194_normalized',
'smart_197_normalized']] = scaler.fit_transform(df[['smart_1_normalized', 'smart_3_normalized',
'smart_5_normalized', 'smart_7_normalized', 'smart_9_normalized', 'smart_187_normalized',
'smart_189_normalized', 'smart_194_normalized', 'smart_197_normalized']])
df
date | serial_number | days_to_failure | failure | smart_1_normalized | smart_3_normalized | smart_5_normalized | smart_7_normalized | smart_9_normalized | smart_187_normalized | smart_189_normalized | smart_194_normalized | smart_197_normalized | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2014-03-01 | MJ1311YNG36USA | 991 | 0 | -0.005025 | -0.256831 | -0.211155 | -0.211155 | -0.227092 | NaN | NaN | 1.000000 | -0.211155 |
1 | 2014-03-01 | MJ1311YNG733NA | 840 | 0 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.227092 | NaN | NaN | 0.975104 | -0.211155 |
2 | 2014-03-01 | W3009AX6 | 54 | 0 | 0.185930 | -0.770492 | -0.211155 | -0.314741 | -0.266932 | 1.0 | 0.979798 | -0.883817 | -0.211155 |
3 | 2014-03-01 | WD-WCAV5M690585 | 409 | 0 | 1.000000 | 0.322404 | 0.585657 | -0.211155 | -0.466135 | NaN | NaN | -0.045643 | 0.585657 |
4 | 2014-03-01 | S1F0CSW2 | 229 | 0 | 0.135678 | -0.759563 | -0.211155 | -0.298805 | -0.338645 | 1.0 | 1.000000 | -0.908714 | -0.211155 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
14769522 | 2023-03-31 | 7LZ01G30 | 0 | 0 | -0.005025 | NaN | NaN | NaN | -0.211155 | NaN | NaN | -0.410788 | NaN |
14769523 | 2023-03-31 | 9JG4657T | 0 | 0 | -0.005025 | -0.836066 | -0.211155 | -0.211155 | -0.227092 | NaN | NaN | -0.643154 | -0.211155 |
14769524 | 2023-03-31 | 6090A00RFVKG | 0 | 0 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.314741 | NaN | NaN | -0.269710 | -0.211155 |
14769525 | 2023-03-31 | 51R0A2Q8FVGG | 0 | 0 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.450199 | NaN | NaN | -0.269710 | -0.211155 |
14769526 | 2023-03-31 | 7QT032NR | 0 | 0 | -0.005025 | NaN | NaN | NaN | -0.211155 | NaN | NaN | -0.302905 | NaN |
14769527 rows × 13 columns
#cleanup garbage entries -> apparently there are entries which have a failure reported on a date
#and then they still report measurements after that date -> these need to be cleared
test=df.copy();
#test
test2 = pd.DataFrame({'serial_number':test.loc[test['failure'] == 1]['serial_number'], 'failure_date':test.loc[test['failure'] == 1]['date']})
#test2
test3 = test.join(test2.set_index('serial_number'), on='serial_number')
#test3
clean = test3.drop(test3[test3['date'] > test3['failure_date']].index)
clean = clean.drop('failure_date', axis=1)
clean
date | serial_number | days_to_failure | failure | smart_1_normalized | smart_3_normalized | smart_5_normalized | smart_7_normalized | smart_9_normalized | smart_187_normalized | smart_189_normalized | smart_194_normalized | smart_197_normalized | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2014-03-01 | MJ1311YNG36USA | 991 | 0 | -0.005025 | -0.256831 | -0.211155 | -0.211155 | -0.227092 | NaN | NaN | 1.000000 | -0.211155 |
1 | 2014-03-01 | MJ1311YNG733NA | 840 | 0 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.227092 | NaN | NaN | 0.975104 | -0.211155 |
2 | 2014-03-01 | W3009AX6 | 54 | 0 | 0.185930 | -0.770492 | -0.211155 | -0.314741 | -0.266932 | 1.000000 | 0.979798 | -0.883817 | -0.211155 |
3 | 2014-03-01 | WD-WCAV5M690585 | 409 | 0 | 1.000000 | 0.322404 | 0.585657 | -0.211155 | -0.466135 | NaN | NaN | -0.045643 | 0.585657 |
4 | 2014-03-01 | S1F0CSW2 | 229 | 0 | 0.135678 | -0.759563 | -0.211155 | -0.298805 | -0.338645 | 1.000000 | 1.000000 | -0.908714 | -0.211155 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
14769333 | 2023-03-30 | 2AGMNB7Y | 0 | 1 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.243028 | NaN | NaN | 0.078838 | -0.211155 |
14769335 | 2023-03-30 | 8HH0KRGH | 0 | 1 | -0.246231 | -0.672131 | -0.211155 | -0.211155 | -0.235060 | NaN | NaN | 0.278008 | -0.211155 |
14769341 | 2023-03-30 | ZLW16KEQ | 0 | 1 | -0.226131 | -0.781421 | -0.211155 | -0.362550 | -0.402390 | 0.979798 | NaN | -0.809129 | -0.211155 |
14769343 | 2023-03-30 | X0GE5KSC | 0 | 1 | -0.005025 | -0.737705 | -0.211155 | -0.211155 | -0.235060 | NaN | NaN | -0.551867 | -0.211155 |
14769346 | 2023-03-30 | 61B0A03NF97G | 0 | 1 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.498008 | NaN | NaN | -0.269710 | -0.211155 |
14442321 rows × 13 columns
df=clean.copy()
df = df.drop(columns= ['days_to_failure'], axis = 1)
df
date | serial_number | failure | smart_1_normalized | smart_3_normalized | smart_5_normalized | smart_7_normalized | smart_9_normalized | smart_187_normalized | smart_189_normalized | smart_194_normalized | smart_197_normalized | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2014-03-01 | MJ1311YNG36USA | 0 | -0.005025 | -0.256831 | -0.211155 | -0.211155 | -0.227092 | NaN | NaN | 1.000000 | -0.211155 |
1 | 2014-03-01 | MJ1311YNG733NA | 0 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.227092 | NaN | NaN | 0.975104 | -0.211155 |
2 | 2014-03-01 | W3009AX6 | 0 | 0.185930 | -0.770492 | -0.211155 | -0.314741 | -0.266932 | 1.000000 | 0.979798 | -0.883817 | -0.211155 |
3 | 2014-03-01 | WD-WCAV5M690585 | 0 | 1.000000 | 0.322404 | 0.585657 | -0.211155 | -0.466135 | NaN | NaN | -0.045643 | 0.585657 |
4 | 2014-03-01 | S1F0CSW2 | 0 | 0.135678 | -0.759563 | -0.211155 | -0.298805 | -0.338645 | 1.000000 | 1.000000 | -0.908714 | -0.211155 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
14769333 | 2023-03-30 | 2AGMNB7Y | 1 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.243028 | NaN | NaN | 0.078838 | -0.211155 |
14769335 | 2023-03-30 | 8HH0KRGH | 1 | -0.246231 | -0.672131 | -0.211155 | -0.211155 | -0.235060 | NaN | NaN | 0.278008 | -0.211155 |
14769341 | 2023-03-30 | ZLW16KEQ | 1 | -0.226131 | -0.781421 | -0.211155 | -0.362550 | -0.402390 | 0.979798 | NaN | -0.809129 | -0.211155 |
14769343 | 2023-03-30 | X0GE5KSC | 1 | -0.005025 | -0.737705 | -0.211155 | -0.211155 | -0.235060 | NaN | NaN | -0.551867 | -0.211155 |
14769346 | 2023-03-30 | 61B0A03NF97G | 1 | -0.005025 | -0.672131 | -0.211155 | -0.211155 | -0.498008 | NaN | NaN | -0.269710 | -0.211155 |
14442321 rows × 12 columns
dfHour = df.groupby(['serial_number']).apply(computeDay)
dfHour = dfHour[dfHour.DayToFailure <= 120]
dfHour = dfHour.drop(columns = ['date'])
dfHour= dfHour.drop(columns= ['failure','serial_number'], axis=1)
dfHour=dfHour.reset_index()
dfHour= dfHour.drop(columns= ['level_1'], axis=1)
window=90
print('Creating the sequence')
dfHourSequence = dfHour.groupby(['serial_number']).apply(series_to_supervised, n_in=window, n_out=1, dropnan=True)
dfHourSequence
Creating the sequence
smart_1_normalized(t-89) | smart_3_normalized(t-89) | smart_5_normalized(t-89) | smart_7_normalized(t-89) | smart_9_normalized(t-89) | smart_187_normalized(t-89) | smart_189_normalized(t-89) | smart_194_normalized(t-89) | smart_197_normalized(t-89) | smart_1_normalized(t-88) | ... | smart_1_normalized(t) | smart_3_normalized(t) | smart_5_normalized(t) | smart_7_normalized(t) | smart_9_normalized(t) | smart_187_normalized(t) | smart_189_normalized(t) | smart_194_normalized(t) | smart_197_normalized(t) | DayToFailure | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
serial_number | ||||||||||||||||||||||
5VML01P0 | 107293 | 0.175879 | -0.704918 | -0.211155 | -0.314741 | -0.577689 | 1.0 | 1.0 | -0.883817 | -0.211155 | 0.135678 | ... | 0.165829 | -0.704918 | -0.211155 | -0.314741 | -0.601594 | 1.000000 | 1.0 | -0.875519 | -0.211155 | 31 |
107294 | 0.135678 | -0.704918 | -0.211155 | -0.314741 | -0.577689 | 1.0 | 1.0 | -0.883817 | -0.211155 | 0.185930 | ... | 0.105528 | -0.704918 | -0.211155 | -0.314741 | -0.601594 | 1.000000 | 1.0 | -0.875519 | -0.211155 | 30 | |
107295 | 0.185930 | -0.704918 | -0.211155 | -0.314741 | -0.577689 | 1.0 | 1.0 | -0.883817 | -0.211155 | 0.155779 | ... | 0.175879 | -0.704918 | -0.211155 | -0.314741 | -0.601594 | 1.000000 | 1.0 | -0.875519 | -0.211155 | 29 | |
107296 | 0.155779 | -0.704918 | -0.211155 | -0.314741 | -0.577689 | 1.0 | 1.0 | -0.883817 | -0.211155 | 0.195980 | ... | 0.135678 | -0.704918 | -0.211155 | -0.314741 | -0.601594 | 1.000000 | 1.0 | -0.875519 | -0.211155 | 28 | |
107297 | 0.195980 | -0.704918 | -0.211155 | -0.314741 | -0.577689 | 1.0 | 1.0 | -0.875519 | -0.211155 | 0.165829 | ... | 0.175879 | -0.704918 | -0.211155 | -0.314741 | -0.601594 | 1.000000 | 1.0 | -0.875519 | -0.211155 | 27 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
ZTT3STWF | 1987895 | -0.206030 | -0.693989 | -0.211155 | -0.338645 | -0.258964 | 1.0 | 1.0 | -0.941909 | -0.211155 | -0.206030 | ... | -0.507538 | -0.693989 | -0.211155 | -0.330677 | -0.274900 | 0.414141 | 1.0 | -0.950207 | -0.211155 | 4 |
1987896 | -0.206030 | -0.693989 | -0.211155 | -0.338645 | -0.258964 | 1.0 | 1.0 | -0.941909 | -0.211155 | -0.236181 | ... | -0.507538 | -0.693989 | -0.211155 | -0.330677 | -0.274900 | 0.414141 | 1.0 | -0.950207 | -0.211155 | 3 | |
1987897 | -0.236181 | -0.693989 | -0.211155 | -0.338645 | -0.258964 | 1.0 | 1.0 | -0.933610 | -0.211155 | -0.216080 | ... | -0.507538 | -0.693989 | -0.211155 | -0.330677 | -0.274900 | 0.414141 | 1.0 | -0.950207 | -0.211155 | 2 | |
1987898 | -0.216080 | -0.693989 | -0.211155 | -0.338645 | -0.258964 | 1.0 | 1.0 | -0.941909 | -0.211155 | -0.206030 | ... | -0.507538 | -0.693989 | -0.211155 | -0.330677 | -0.274900 | 0.414141 | 1.0 | -0.950207 | -0.211155 | 1 | |
1987899 | -0.206030 | -0.693989 | -0.211155 | -0.338645 | -0.258964 | 1.0 | 1.0 | -0.941909 | -0.211155 | -0.185930 | ... | -0.507538 | -0.693989 | -0.211155 | -0.330677 | -0.274900 | 0.414141 | 1.0 | -0.950207 | -0.211155 | 0 |
277551 rows × 811 columns
print('Dividing into train test')
X_train, X_rim, y_train, y_rim = train_test_split(dfHourSequence[dfHourSequence.columns[:-1]],
dfHourSequence[dfHourSequence.columns[-1:]] ,
stratify=dfHourSequence[dfHourSequence.columns[-1:]],
test_size=0.30)
Dividing into train test
print(y_train)
print(y_train.columns)
DayToFailure serial_number W300T3EK 687360 6 Z305DHV4 1231919 11 ZA17ZFEW 1446046 11 Z302SWXT 1021026 5 S301KQQX 554027 26 ... ... Z304TK95 1166742 6 Z305FNVM 1240279 0 Z303N1QB 1052406 10 W300R8BD 683889 3 Z302B11W 999139 29 [194285 rows x 1 columns] Index(['DayToFailure'], dtype='object')
X_val, X_test, y_val, y_test = train_test_split(X_rim, y_rim ,stratify=y_rim, test_size=0.50)
X_train = pd.concat([X_train, pd.DataFrame(columns = ['DayToFailure'])], sort = True)
X_val = pd.concat([X_val, pd.DataFrame(columns = ['DayToFailure'])], sort = True)
X_test = pd.concat([X_test, pd.DataFrame(columns = ['DayToFailure'])], sort = True)
X_train[['DayToFailure']] = y_train.values
X_val[['DayToFailure']] = y_val.values
X_test[['DayToFailure']] = y_test.values
X_train
#X_val
#X_test
DayToFailure | smart_187_normalized(t) | smart_187_normalized(t-1) | smart_187_normalized(t-10) | smart_187_normalized(t-11) | smart_187_normalized(t-12) | smart_187_normalized(t-13) | smart_187_normalized(t-14) | smart_187_normalized(t-15) | smart_187_normalized(t-16) | ... | smart_9_normalized(t-81) | smart_9_normalized(t-82) | smart_9_normalized(t-83) | smart_9_normalized(t-84) | smart_9_normalized(t-85) | smart_9_normalized(t-86) | smart_9_normalized(t-87) | smart_9_normalized(t-88) | smart_9_normalized(t-89) | smart_9_normalized(t-9) | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
W300T3EK | 687360 | 6 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | -0.219124 | -0.219124 | -0.219124 | -0.219124 | -0.219124 | -0.219124 | -0.219124 | -0.219124 | -0.219124 | -0.235060 |
Z305DHV4 | 1231919 | 11 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.474104 |
ZA17ZFEW | 1446046 | 11 | -0.878788 | -0.878788 | -0.878788 | -0.878788 | -0.878788 | -0.878788 | -0.878788 | -0.878788 | -0.878788 | ... | -0.617530 | -0.617530 | -0.617530 | -0.617530 | -0.617530 | -0.617530 | -0.617530 | -0.617530 | -0.617530 | -0.633466 |
Z302SWXT | 1021026 | 5 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.330677 |
S301KQQX | 554027 | 26 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.314741 | -0.330677 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
Z304TK95 | 1166742 | 6 | 0.252525 | 0.252525 | 0.252525 | 0.252525 | 0.252525 | 0.252525 | 0.252525 | 0.252525 | 0.272727 | ... | -0.442231 | -0.442231 | -0.442231 | -0.442231 | -0.442231 | -0.442231 | -0.442231 | -0.442231 | -0.442231 | -0.458167 |
Z305FNVM | 1240279 | 0 | -0.131313 | 0.272727 | 0.454545 | 0.575758 | 0.575758 | 0.575758 | 0.575758 | 0.575758 | 0.575758 | ... | -0.593625 | -0.593625 | -0.593625 | -0.593625 | -0.593625 | -0.593625 | -0.585657 | -0.585657 | -0.585657 | -0.609562 |
Z303N1QB | 1052406 | 10 | 0.797980 | 0.797980 | 0.797980 | 0.797980 | 0.797980 | 0.797980 | 0.797980 | 0.797980 | 0.797980 | ... | -0.466135 | -0.466135 | -0.466135 | -0.466135 | -0.466135 | -0.466135 | -0.466135 | -0.466135 | -0.466135 | -0.482072 |
W300R8BD | 683889 | 3 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | -0.426295 | -0.426295 | -0.426295 | -0.426295 | -0.426295 | -0.426295 | -0.426295 | -0.418327 | -0.418327 | -0.442231 |
Z302B11W | 999139 | 29 | 0.878788 | 0.878788 | 0.878788 | 0.878788 | 0.878788 | 0.878788 | 0.878788 | 0.878788 | 0.878788 | ... | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.458167 | -0.474104 |
194285 rows × 811 columns
X_train.shape
(194285, 811)
X_val.shape
(41633, 811)
Xtrain = X_train.copy()
Xtrain = Xtrain.drop(columns=['DayToFailure'], axis=1 )
Xtrain.shape
#Xtrain
(194285, 810)
Xval = X_val.copy()
Xval = Xval.drop(columns=['DayToFailure'], axis=1 )
Xval.shape
(41633, 810)
yTest = X_test[['DayToFailure']].values
#yTest
Xtest = X_test.drop(columns=['DayToFailure'], axis=1 )
#Xtest
#reshape with window
Xtrain = Xtrain.values.reshape(Xtrain.shape[0], window, int(Xtrain.shape[1]/window))
Xval = Xval.values.reshape(Xval.shape[0], window, int(Xval.shape[1]/window))
Xtest= Xtest.values.reshape(Xtest.shape[0], window, int(Xtest.shape[1]/window))
ytrain = X_train[['DayToFailure']].values
yVal = X_val[['DayToFailure']].values
print(Xtrain.shape)
print(Xval.shape)
print(Xtest.shape)
print(ytrain.shape)
print(yVal.shape)
print(yTest.shape)
(194285, 90, 9) (41633, 90, 9) (41633, 90, 9) (194285, 1) (41633, 1) (41633, 1)
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
import keras
#same as experiment6 - BiLSTM
def build_model():
dp_lvl = 0.1
model = Sequential()
model.add(Bidirectional(LSTM(128, input_shape=(window, 9), return_sequences = True, activation = "tanh" )))
model.add(Bidirectional(LSTM(64, return_sequences = True, activation = "tanh" )))
model.add(Bidirectional(LSTM(32, activation="tanh")))
model.add(Dense(96, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(1))
return model
#same as experiment6 - LSTM
epoch = 150
historyvet =[]
model = build_model()
best_acc= 0.0
#adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0,amsgrad=False)
adam = tf.keras.optimizers.legacy.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0,amsgrad=False)
#adam=tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss='mse', optimizer=adam)
for epoch in range(0,epoch):
print('Epoch {%d}' %(epoch))
#model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal), shuffle=True)
history = model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal))
historyvet.append(history.history)
model.save('bilstm_predict_rul_experiment8_extended_full_take3.h5')
model.summary()
/usr/local/lib/python3.10/dist-packages/keras/src/optimizers/legacy/adam.py:118: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead. super().__init__(name, **kwargs)
Epoch {0} 389/389 [==============================] - 340s 841ms/step - loss: 92.4284 - val_loss: 85.2481 Epoch {1} 389/389 [==============================] - 321s 825ms/step - loss: 85.1641 - val_loss: 85.7237 Epoch {2} 389/389 [==============================] - 323s 829ms/step - loss: 84.6595 - val_loss: 83.9631 Epoch {3} 194/389 [=============>................] - ETA: 2:26 - loss: 84.2638
lossTrain=[]
lossval=[]
accTrain = []
accVal =[]
for element in historyvet:
lossTrain.append(element['loss'][0])
lossval.append(element['val_loss'][0])
#accTrain.append(element['accuracy'][0])
#accVal.append(element['val_accuracy'][0])
plt.plot(lossTrain, color='g')
plt.plot(lossval, color='r')
plt.title('model loss')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
#skip this cell if the above one finished and you are continuing the work , otherwise if say you disconnected the notebook and want to resume run this one to load the model it generated overnight
model = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3.h5')
print(model.evaluate(Xtest,yTest))
1302/1302 [==============================] - 143s 108ms/step - loss: 67.4307 67.43070220947266
pred = model.predict(Xtest)
1302/1302 [==============================] - 138s 104ms/step
y = yTest.copy()
yhat = pred.copy()
# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))
print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better
Results by manual calculation: MAE: 6.714504805846707 MSE: 67.43068458432414 RMSE: 8.211618877196148 R-Squared: [0.20853765]
More training as it looks like it wants to get somewhere interesting
#another 150 epochs to train
historyvet =[]
model = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3.h5')
epoch = 150
for epoch in range(0,epoch):
print('Epoch {%d}' %(epoch))
history = model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal), shuffle=True)
historyvet.append(history.history)
model.save('bilstm_predict_rul_experiment8_extended_full_take3_part2.h5')
model.summary()
Epoch {0} 389/389 [==============================] - 343s 862ms/step - loss: 64.2593 - val_loss: 68.3754 Epoch {1} 389/389 [==============================] - 332s 852ms/step - loss: 63.3964 - val_loss: 67.4172 Epoch {2} 389/389 [==============================] - 329s 847ms/step - loss: 63.3546 - val_loss: 67.3026 Epoch {3} 389/389 [==============================] - 332s 854ms/step - loss: 62.9733 - val_loss: 67.0207 Epoch {4} 389/389 [==============================] - 333s 856ms/step - loss: 62.8247 - val_loss: 67.2608 Epoch {5} 389/389 [==============================] - 329s 846ms/step - loss: 62.2602 - val_loss: 67.5695 Epoch {6} 389/389 [==============================] - 336s 862ms/step - loss: 62.5081 - val_loss: 66.4262 Epoch {7} 389/389 [==============================] - 332s 855ms/step - loss: 62.3149 - val_loss: 66.7445 Epoch {8} 389/389 [==============================] - 337s 866ms/step - loss: 61.9950 - val_loss: 66.2608 Epoch {9} 389/389 [==============================] - 334s 859ms/step - loss: 61.4956 - val_loss: 65.5214 Epoch {10} 389/389 [==============================] - 334s 859ms/step - loss: 61.2620 - val_loss: 65.0429 Epoch {11} 389/389 [==============================] - 334s 859ms/step - loss: 61.1446 - val_loss: 65.6227 Epoch {12} 389/389 [==============================] - 327s 841ms/step - loss: 60.8006 - val_loss: 65.3122 Epoch {13} 389/389 [==============================] - 331s 851ms/step - loss: 61.2606 - val_loss: 64.9975 Epoch {14} 389/389 [==============================] - 328s 842ms/step - loss: 60.2370 - val_loss: 64.4013 Epoch {15} 389/389 [==============================] - 329s 847ms/step - loss: 60.1839 - val_loss: 64.5551 Epoch {16} 389/389 [==============================] - 330s 849ms/step - loss: 59.7568 - val_loss: 64.6951 Epoch {17} 389/389 [==============================] - 332s 854ms/step - loss: 59.8941 - val_loss: 63.5398 Epoch {18} 389/389 [==============================] - 335s 862ms/step - loss: 59.6660 - val_loss: 64.0032 Epoch {19} 389/389 [==============================] - 332s 853ms/step - loss: 59.0889 - val_loss: 64.5364 Epoch {20} 389/389 [==============================] - 333s 857ms/step - loss: 58.7660 - val_loss: 63.8183 Epoch {21} 389/389 [==============================] - 333s 855ms/step - loss: 59.8411 - val_loss: 64.7533 Epoch {22} 389/389 [==============================] - 331s 852ms/step - loss: 58.6592 - val_loss: 63.8076 Epoch {23} 389/389 [==============================] - 332s 855ms/step - loss: 58.1071 - val_loss: 63.0844 Epoch {24} 389/389 [==============================] - 331s 852ms/step - loss: 57.9922 - val_loss: 63.0138 Epoch {25} 389/389 [==============================] - 334s 860ms/step - loss: 58.1064 - val_loss: 63.5760 Epoch {26} 389/389 [==============================] - 333s 856ms/step - loss: 57.3484 - val_loss: 62.2100 Epoch {27} 389/389 [==============================] - 332s 853ms/step - loss: 57.0489 - val_loss: 61.7560 Epoch {28} 389/389 [==============================] - 333s 856ms/step - loss: 56.7409 - val_loss: 61.9766 Epoch {29} 389/389 [==============================] - 329s 846ms/step - loss: 56.1090 - val_loss: 60.9149 Epoch {30} 389/389 [==============================] - 336s 864ms/step - loss: 56.4711 - val_loss: 60.8571 Epoch {31} 389/389 [==============================] - 331s 851ms/step - loss: 56.0427 - val_loss: 61.6544 Epoch {32} 389/389 [==============================] - 330s 849ms/step - loss: 57.5427 - val_loss: 66.4305 Epoch {33} 389/389 [==============================] - 330s 848ms/step - loss: 56.3025 - val_loss: 59.7977 Epoch {34} 389/389 [==============================] - 328s 842ms/step - loss: 55.1212 - val_loss: 61.3966 Epoch {35} 389/389 [==============================] - 328s 843ms/step - loss: 54.9564 - val_loss: 59.9022 Epoch {36} 389/389 [==============================] - 327s 841ms/step - loss: 54.4106 - val_loss: 60.0068 Epoch {37} 389/389 [==============================] - 329s 845ms/step - loss: 54.4488 - val_loss: 59.5027 Epoch {38} 389/389 [==============================] - 330s 847ms/step - loss: 53.9621 - val_loss: 58.7557 Epoch {39} 389/389 [==============================] - 326s 839ms/step - loss: 53.7698 - val_loss: 58.6915 Epoch {40} 389/389 [==============================] - 327s 841ms/step - loss: 53.8168 - val_loss: 61.1053 Epoch {41} 389/389 [==============================] - 326s 838ms/step - loss: 54.2586 - val_loss: 61.5899 Epoch {42} 389/389 [==============================] - 329s 847ms/step - loss: 53.1145 - val_loss: 58.5002 Epoch {43} 389/389 [==============================] - 327s 841ms/step - loss: 52.4294 - val_loss: 58.4195 Epoch {44} 389/389 [==============================] - 327s 841ms/step - loss: 52.4108 - val_loss: 57.0662 Epoch {45} 389/389 [==============================] - 327s 840ms/step - loss: 52.5267 - val_loss: 57.3894 Epoch {46} 389/389 [==============================] - 327s 840ms/step - loss: 51.9211 - val_loss: 57.6161 Epoch {47} 389/389 [==============================] - 328s 844ms/step - loss: 52.1283 - val_loss: 56.9461 Epoch {48} 389/389 [==============================] - 327s 840ms/step - loss: 51.1085 - val_loss: 56.3701 Epoch {49} 389/389 [==============================] - 330s 848ms/step - loss: 50.8462 - val_loss: 57.8374 Epoch {50} 389/389 [==============================] - 326s 839ms/step - loss: 51.0079 - val_loss: 56.1585 Epoch {51} 389/389 [==============================] - 328s 842ms/step - loss: 50.0825 - val_loss: 56.3187 Epoch {52} 389/389 [==============================] - 325s 837ms/step - loss: 50.1658 - val_loss: 56.2422 Epoch {53} 389/389 [==============================] - 327s 841ms/step - loss: 49.6024 - val_loss: 55.4322 Epoch {54} 389/389 [==============================] - 329s 846ms/step - loss: 49.7728 - val_loss: 54.7449 Epoch {55} 389/389 [==============================] - 326s 838ms/step - loss: 49.6413 - val_loss: 54.7127 Epoch {56} 389/389 [==============================] - 329s 845ms/step - loss: 48.9819 - val_loss: 55.4102 Epoch {57} 389/389 [==============================] - 325s 836ms/step - loss: 48.8032 - val_loss: 55.7070 Epoch {58} 389/389 [==============================] - 326s 839ms/step - loss: 48.5199 - val_loss: 56.3114 Epoch {59} 389/389 [==============================] - 328s 844ms/step - loss: 47.7044 - val_loss: 54.0407 Epoch {60} 389/389 [==============================] - 327s 841ms/step - loss: 47.6278 - val_loss: 52.7045 Epoch {61} 389/389 [==============================] - 333s 855ms/step - loss: 47.4723 - val_loss: 53.4739 Epoch {62} 389/389 [==============================] - 326s 837ms/step - loss: 47.1001 - val_loss: 56.5136 Epoch {63} 389/389 [==============================] - 328s 842ms/step - loss: 48.0536 - val_loss: 55.6724 Epoch {64} 389/389 [==============================] - 329s 845ms/step - loss: 46.8253 - val_loss: 53.2679 Epoch {65} 389/389 [==============================] - 327s 841ms/step - loss: 46.7869 - val_loss: 52.4258 Epoch {66} 389/389 [==============================] - 328s 844ms/step - loss: 46.4162 - val_loss: 54.8050 Epoch {67} 389/389 [==============================] - 325s 836ms/step - loss: 45.9856 - val_loss: 52.2177 Epoch {68} 389/389 [==============================] - 326s 838ms/step - loss: 44.9784 - val_loss: 52.2360 Epoch {69} 389/389 [==============================] - ETA: 0s - loss: 44.7546
lossTrain=[]
lossval=[]
accTrain = []
accVal =[]
for element in historyvet:
lossTrain.append(element['loss'][0])
lossval.append(element['val_loss'][0])
#accTrain.append(element['accuracy'][0])
#accVal.append(element['val_accuracy'][0])
plt.plot(lossTrain, color='g')
plt.plot(lossval, color='r')
plt.title('model loss')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
pred = model.predict(Xtest)
1302/1302 [==============================] - 125s 94ms/step
print(model.evaluate(Xtest,yTest))
1302/1302 [==============================] - 126s 96ms/step - loss: 33.3588 33.35878372192383
y = yTest.copy()
yhat = pred.copy()
# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))
print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better
Results by manual calculation: MAE: 4.252981828318384 MSE: 33.35878218176548 RMSE: 5.775706206323646 R-Squared: [0.60845392]
# Plot true and predicted RUL values
plt.plot(yTest, label = "True RUL", color = "red")
plt.plot(pred, label = "Pred RUL", color = "blue")
plt.legend()
plt.show()
x = list(range(len(yTest)))
plt.scatter(x, yTest, color="blue", label="original")
plt.plot(x, pred, color="red", label="predicted")
plt.legend()
plt.show()
y = yTest.copy()
yhat = pred.copy()
# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))
print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better
Results by manual calculation: MAE: 4.252981828318384 MSE: 33.35878218176548 RMSE: 5.775706206323646 R-Squared: [0.60845392]
#another 150 epochs to train
historyvet =[]
model = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3_part2.h5')
epoch = 150
for epoch in range(0,epoch):
print('Epoch {%d}' %(epoch))
history = model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal), shuffle=True)
historyvet.append(history.history)
model.save('bilstm_predict_rul_experiment8_extended_full_take3_part3.h5')
model.summary()
Epoch {0} 389/389 [==============================] - 338s 853ms/step - loss: 25.8730 - val_loss: 36.7638 Epoch {1} 389/389 [==============================] - 330s 847ms/step - loss: 28.3172 - val_loss: 33.4057 Epoch {2} 389/389 [==============================] - 328s 842ms/step - loss: 27.1991 - val_loss: 35.4056 Epoch {3} 389/389 [==============================] - 328s 844ms/step - loss: 27.9181 - val_loss: 34.8644 Epoch {4} 389/389 [==============================] - 328s 843ms/step - loss: 26.4577 - val_loss: 37.4612 Epoch {5} 389/389 [==============================] - 328s 843ms/step - loss: 26.3098 - val_loss: 35.6996 Epoch {6} 389/389 [==============================] - 331s 851ms/step - loss: 24.5580 - val_loss: 30.9044 Epoch {7} 389/389 [==============================] - 328s 843ms/step - loss: 23.9100 - val_loss: 36.0782 Epoch {8} 389/389 [==============================] - 331s 851ms/step - loss: 40.0271 - val_loss: 38.7793 Epoch {9} 389/389 [==============================] - 326s 838ms/step - loss: 27.4546 - val_loss: 36.4517 Epoch {10} 389/389 [==============================] - 328s 843ms/step - loss: 27.2428 - val_loss: 33.3448 Epoch {11} 389/389 [==============================] - 328s 843ms/step - loss: 37.9628 - val_loss: 64.6191 Epoch {12} 389/389 [==============================] - 327s 840ms/step - loss: 45.6755 - val_loss: 42.1760 Epoch {13} 389/389 [==============================] - 330s 849ms/step - loss: 29.6218 - val_loss: 33.5977 Epoch {14} 389/389 [==============================] - 326s 838ms/step - loss: 25.4958 - val_loss: 33.2421 Epoch {15} 389/389 [==============================] - 329s 845ms/step - loss: 30.3199 - val_loss: 45.8646 Epoch {16} 389/389 [==============================] - 327s 840ms/step - loss: 34.3555 - val_loss: 38.6681 Epoch {17} 389/389 [==============================] - 328s 842ms/step - loss: 29.1423 - val_loss: 53.5323 Epoch {18} 389/389 [==============================] - 328s 843ms/step - loss: 27.6896 - val_loss: 32.0927 Epoch {19} 389/389 [==============================] - 328s 844ms/step - loss: 23.5051 - val_loss: 33.8288 Epoch {20} 389/389 [==============================] - 329s 845ms/step - loss: 24.4657 - val_loss: 33.2159 Epoch {21} 389/389 [==============================] - 328s 842ms/step - loss: 25.3128 - val_loss: 33.6188 Epoch {22} 389/389 [==============================] - 329s 845ms/step - loss: 24.3259 - val_loss: 32.3556 Epoch {23} 389/389 [==============================] - 326s 839ms/step - loss: 22.8256 - val_loss: 49.4982 Epoch {24} 389/389 [==============================] - 329s 846ms/step - loss: 24.0169 - val_loss: 28.7486 Epoch {25} 389/389 [==============================] - 328s 844ms/step - loss: 48.0929 - val_loss: 59.0893 Epoch {26} 389/389 [==============================] - 325s 836ms/step - loss: 29.6776 - val_loss: 30.8032 Epoch {27} 389/389 [==============================] - 328s 843ms/step - loss: 23.2279 - val_loss: 30.0722 Epoch {28} 389/389 [==============================] - 327s 842ms/step - loss: 21.6707 - val_loss: 31.5854 Epoch {29} 389/389 [==============================] - 328s 842ms/step - loss: 21.3462 - val_loss: 31.1337 Epoch {30} 389/389 [==============================] - 328s 842ms/step - loss: 22.2737 - val_loss: 30.3173 Epoch {31} 389/389 [==============================] - 327s 841ms/step - loss: 23.2598 - val_loss: 30.5546 Epoch {32} 389/389 [==============================] - 330s 849ms/step - loss: 21.1547 - val_loss: 30.9380 Epoch {33} 389/389 [==============================] - 328s 843ms/step - loss: 22.6806 - val_loss: 29.1014 Epoch {34} 389/389 [==============================] - 327s 840ms/step - loss: 20.5276 - val_loss: 28.6941 Epoch {35} 389/389 [==============================] - 328s 842ms/step - loss: 20.4221 - val_loss: 27.7424 Epoch {36} 389/389 [==============================] - 325s 836ms/step - loss: 25.7414 - val_loss: 32.6389 Epoch {37} 389/389 [==============================] - 328s 842ms/step - loss: 20.8044 - val_loss: 39.1481 Epoch {38} 389/389 [==============================] - 329s 845ms/step - loss: 21.4919 - val_loss: 28.2595 Epoch {39} 389/389 [==============================] - 326s 838ms/step - loss: 22.0989 - val_loss: 29.1410 Epoch {40} 389/389 [==============================] - 330s 847ms/step - loss: 20.5530 - val_loss: 27.6230 Epoch {41} 389/389 [==============================] - 326s 838ms/step - loss: 21.6128 - val_loss: 30.0240 Epoch {42} 389/389 [==============================] - 326s 838ms/step - loss: 21.1913 - val_loss: 28.2491 Epoch {43} 124/389 [========>.....................] - ETA: 3:22 - loss: 18.5470
IOPub message rate exceeded. The notebook server will temporarily stop sending output to the client in order to avoid crashing it. To change this limit, set the config variable `--NotebookApp.iopub_msg_rate_limit`. Current values: NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec) NotebookApp.rate_limit_window=3.0 (secs)
389/389 [==============================] - 331s 850ms/step - loss: 15.3658 - val_loss: 22.2062 Epoch {90} 389/389 [==============================] - 329s 846ms/step - loss: 14.1163 - val_loss: 23.0011 Epoch {91} 389/389 [==============================] - 330s 848ms/step - loss: 14.3330 - val_loss: 22.2124 Epoch {92} 100/389 [======>.......................] - ETA: 3:49 - loss: 13.7512
lossTrain=[]
lossval=[]
accTrain = []
accVal =[]
for element in historyvet:
lossTrain.append(element['loss'][0])
lossval.append(element['val_loss'][0])
#accTrain.append(element['accuracy'][0])
#accVal.append(element['val_accuracy'][0])
plt.plot(lossTrain, color='g')
plt.plot(lossval, color='r')
plt.title('model loss')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
pred = model.predict(Xtest)
1302/1302 [==============================] - 135s 101ms/step
print(model.evaluate(Xtest,yTest))
1302/1302 [==============================] - 133s 102ms/step - loss: 17.9300 17.929988861083984
y = yTest.copy()
yhat = pred.copy()
# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))
print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better
Results by manual calculation: MAE: 3.026280620487356 MSE: 17.929977768579597 RMSE: 4.234380446839844 R-Squared: [0.7895483]
# Plot true and predicted RUL values
plt.plot(yTest, label = "True RUL", color = "red")
plt.plot(pred, label = "Pred RUL", color = "blue")
plt.legend()
plt.show()
x = list(range(len(yTest)))
plt.scatter(x, yTest, color="blue", label="original")
plt.plot(x, pred, color="red", label="predicted")
plt.legend()
plt.show()
#another 50 epochs to train
historyvet =[]
model = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3_part3.h5')
epoch = 50
for epoch in range(0,epoch):
print('Epoch {%d}' %(epoch))
history = model.fit(Xtrain, ytrain, epochs=1, batch_size=500, validation_data= (Xval,yVal), shuffle=True)
historyvet.append(history.history)
model.save('bilstm_predict_rul_experiment8_extended_full_take3_part4.h5')
model.summary()
Epoch {0} 389/389 [==============================] - 341s 860ms/step - loss: 9.9645 - val_loss: 18.2545 Epoch {1} 389/389 [==============================] - 330s 849ms/step - loss: 10.0845 - val_loss: 19.1041 Epoch {2} 389/389 [==============================] - 332s 852ms/step - loss: 10.3525 - val_loss: 19.0241 Epoch {3} 389/389 [==============================] - 330s 847ms/step - loss: 11.2895 - val_loss: 16.3651 Epoch {4} 389/389 [==============================] - 332s 853ms/step - loss: 12.9427 - val_loss: 19.6775 Epoch {5} 389/389 [==============================] - 334s 858ms/step - loss: 10.2216 - val_loss: 18.1022 Epoch {6} 389/389 [==============================] - 330s 849ms/step - loss: 9.6090 - val_loss: 18.8817 Epoch {7} 389/389 [==============================] - 333s 855ms/step - loss: 9.6592 - val_loss: 19.6096 Epoch {8} 389/389 [==============================] - 330s 847ms/step - loss: 15.4943 - val_loss: 18.0857 Epoch {9} 389/389 [==============================] - 344s 886ms/step - loss: 10.0890 - val_loss: 18.4702 Epoch {10} 389/389 [==============================] - 333s 855ms/step - loss: 10.9185 - val_loss: 18.3299 Epoch {11} 389/389 [==============================] - 332s 855ms/step - loss: 9.8375 - val_loss: 17.2769 Epoch {12} 389/389 [==============================] - 334s 858ms/step - loss: 9.0572 - val_loss: 18.4719 Epoch {13} 389/389 [==============================] - 330s 848ms/step - loss: 9.1006 - val_loss: 16.8966 Epoch {14} 389/389 [==============================] - 333s 855ms/step - loss: 9.3132 - val_loss: 19.2194 Epoch {15} 389/389 [==============================] - 330s 849ms/step - loss: 10.1103 - val_loss: 17.0467 Epoch {16} 389/389 [==============================] - 332s 854ms/step - loss: 10.6557 - val_loss: 18.4849 Epoch {17} 389/389 [==============================] - 332s 853ms/step - loss: 9.1632 - val_loss: 17.1429 Epoch {18} 389/389 [==============================] - 331s 850ms/step - loss: 8.9634 - val_loss: 16.3619 Epoch {19} 389/389 [==============================] - 331s 852ms/step - loss: 15.5038 - val_loss: 19.0220 Epoch {20} 389/389 [==============================] - 330s 850ms/step - loss: 9.1171 - val_loss: 16.2541 Epoch {21} 389/389 [==============================] - 334s 859ms/step - loss: 8.4189 - val_loss: 16.1693 Epoch {22} 389/389 [==============================] - 333s 857ms/step - loss: 8.5636 - val_loss: 20.3592 Epoch {23} 389/389 [==============================] - 334s 859ms/step - loss: 9.1997 - val_loss: 17.8628 Epoch {24} 389/389 [==============================] - 333s 856ms/step - loss: 9.2838 - val_loss: 17.8284 Epoch {25} 389/389 [==============================] - 334s 859ms/step - loss: 8.8097 - val_loss: 18.5538 Epoch {26} 389/389 [==============================] - 333s 856ms/step - loss: 15.5939 - val_loss: 18.6431 Epoch {27} 389/389 [==============================] - 336s 863ms/step - loss: 12.9848 - val_loss: 26.0683 Epoch {28} 389/389 [==============================] - 341s 876ms/step - loss: 18.6866 - val_loss: 17.9619 Epoch {29} 389/389 [==============================] - 336s 865ms/step - loss: 8.7301 - val_loss: 15.2111 Epoch {30} 389/389 [==============================] - 332s 854ms/step - loss: 8.1547 - val_loss: 16.6626 Epoch {31} 389/389 [==============================] - 332s 852ms/step - loss: 8.0667 - val_loss: 16.3300 Epoch {32} 389/389 [==============================] - 335s 860ms/step - loss: 9.6312 - val_loss: 18.5143 Epoch {33} 389/389 [==============================] - 338s 868ms/step - loss: 8.8509 - val_loss: 16.0280 Epoch {34} 389/389 [==============================] - 333s 856ms/step - loss: 9.1759 - val_loss: 19.9139 Epoch {35} 389/389 [==============================] - 335s 860ms/step - loss: 9.0047 - val_loss: 16.0639 Epoch {36} 389/389 [==============================] - 333s 855ms/step - loss: 7.9623 - val_loss: 17.3039 Epoch {37} 389/389 [==============================] - 335s 861ms/step - loss: 8.1814 - val_loss: 17.1028 Epoch {38} 389/389 [==============================] - 335s 861ms/step - loss: 8.6306 - val_loss: 18.1676 Epoch {39} 389/389 [==============================] - 335s 861ms/step - loss: 8.6266 - val_loss: 16.3168 Epoch {40} 389/389 [==============================] - 339s 869ms/step - loss: 8.3986 - val_loss: 16.0175 Epoch {41} 389/389 [==============================] - 335s 860ms/step - loss: 8.4103 - val_loss: 17.0997 Epoch {42} 389/389 [==============================] - 336s 864ms/step - loss: 9.0861 - val_loss: 16.6194 Epoch {43} 389/389 [==============================] - 334s 860ms/step - loss: 8.8549 - val_loss: 16.9267 Epoch {44} 389/389 [==============================] - 333s 857ms/step - loss: 8.1499 - val_loss: 15.8692 Epoch {45} 389/389 [==============================] - 337s 866ms/step - loss: 8.9060 - val_loss: 17.0316 Epoch {46} 389/389 [==============================] - 334s 857ms/step - loss: 8.2583 - val_loss: 17.9719 Epoch {47} 389/389 [==============================] - 335s 861ms/step - loss: 8.8649 - val_loss: 15.9925 Epoch {48} 389/389 [==============================] - 335s 861ms/step - loss: 8.3989 - val_loss: 16.3942 Epoch {49} 389/389 [==============================] - 336s 863ms/step - loss: 8.0296 - val_loss: 15.1184 Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= bidirectional (Bidirection (None, 90, 256) 141312 al) bidirectional_1 (Bidirecti (None, 90, 128) 164352 onal) bidirectional_2 (Bidirecti (None, 64) 41216 onal) dense (Dense) (None, 96) 6240 dense_1 (Dense) (None, 128) 12416 dense_2 (Dense) (None, 1) 129 ================================================================= Total params: 365665 (1.39 MB) Trainable params: 365665 (1.39 MB) Non-trainable params: 0 (0.00 Byte) _________________________________________________________________
lossTrain=[]
lossval=[]
accTrain = []
accVal =[]
for element in historyvet:
lossTrain.append(element['loss'][0])
lossval.append(element['val_loss'][0])
#accTrain.append(element['accuracy'][0])
#accVal.append(element['val_accuracy'][0])
plt.plot(lossTrain, color='g')
plt.plot(lossval, color='r')
plt.title('model loss')
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
pred = model.predict(Xtest)
1302/1302 [==============================] - 138s 104ms/step
print(model.evaluate(Xtest,yTest))
1302/1302 [==============================] - 136s 104ms/step - loss: 14.4658 14.465767860412598
y = yTest.copy()
yhat = pred.copy()
# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))
print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better
Results by manual calculation: MAE: 2.66739492077735 MSE: 14.465755477077103 RMSE: 3.8033873687907604 R-Squared: [0.83020934]
# Plot true and predicted RUL values
plt.plot(yTest, label = "True RUL", color = "red")
plt.plot(pred, label = "Pred RUL", color = "blue")
plt.legend()
plt.show()
x = list(range(len(yTest)))
plt.scatter(x, yTest, color="blue", label="original")
plt.plot(x, pred, color="red", label="predicted")
plt.legend()
plt.show()
Fine tune
newModel = load_model( 'bilstm_predict_rul_experiment8_extended_full_take3_part4.h5')
#adam = optimizers.Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
adam=tf.keras.optimizers.Adam(learning_rate=0.001)
newModel.compile(loss='mse', optimizer=adam)
newModel.summary()
epochs = 25
newModel.fit(Xval,yVal, epochs=epochs, batch_size=500)
newModel.save('bilstm_predict_rul_experiment8_extended_full_take3_part4_best.h5')
newModel.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= bidirectional (Bidirection (None, 90, 256) 141312 al) bidirectional_1 (Bidirecti (None, 90, 128) 164352 onal) bidirectional_2 (Bidirecti (None, 64) 41216 onal) dense (Dense) (None, 96) 6240 dense_1 (Dense) (None, 128) 12416 dense_2 (Dense) (None, 1) 129 ================================================================= Total params: 365665 (1.39 MB) Trainable params: 365665 (1.39 MB) Non-trainable params: 0 (0.00 Byte) _________________________________________________________________ Epoch 1/25 84/84 [==============================] - 78s 773ms/step - loss: 18.8138 Epoch 2/25 84/84 [==============================] - 65s 775ms/step - loss: 15.1091 Epoch 3/25 84/84 [==============================] - 66s 780ms/step - loss: 23.6433 Epoch 4/25 84/84 [==============================] - 65s 779ms/step - loss: 15.5591 Epoch 5/25 84/84 [==============================] - 65s 772ms/step - loss: 12.9244 Epoch 6/25 84/84 [==============================] - 66s 783ms/step - loss: 11.1757 Epoch 7/25 84/84 [==============================] - 65s 775ms/step - loss: 10.3805 Epoch 8/25 84/84 [==============================] - 65s 778ms/step - loss: 9.4869 Epoch 9/25 84/84 [==============================] - 64s 768ms/step - loss: 8.9791 Epoch 10/25 84/84 [==============================] - 65s 776ms/step - loss: 8.6154 Epoch 11/25 84/84 [==============================] - 65s 777ms/step - loss: 8.4584 Epoch 12/25 84/84 [==============================] - 65s 775ms/step - loss: 10.6611 Epoch 13/25 84/84 [==============================] - 65s 767ms/step - loss: 8.3051 Epoch 14/25 84/84 [==============================] - 65s 769ms/step - loss: 9.2540 Epoch 15/25 84/84 [==============================] - 65s 770ms/step - loss: 8.8023 Epoch 16/25 84/84 [==============================] - 65s 775ms/step - loss: 7.9576 Epoch 17/25 84/84 [==============================] - 64s 767ms/step - loss: 6.7822 Epoch 18/25 84/84 [==============================] - 65s 771ms/step - loss: 6.8865 Epoch 19/25 84/84 [==============================] - 64s 767ms/step - loss: 7.3095 Epoch 20/25 84/84 [==============================] - 65s 772ms/step - loss: 7.3847 Epoch 21/25 84/84 [==============================] - 65s 779ms/step - loss: 7.3081 Epoch 22/25 84/84 [==============================] - 65s 775ms/step - loss: 16.4596 Epoch 23/25 84/84 [==============================] - 65s 771ms/step - loss: 14.7284 Epoch 24/25 84/84 [==============================] - 66s 780ms/step - loss: 9.3644 Epoch 25/25 84/84 [==============================] - 65s 778ms/step - loss: 6.8536 Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= bidirectional (Bidirection (None, 90, 256) 141312 al) bidirectional_1 (Bidirecti (None, 90, 128) 164352 onal) bidirectional_2 (Bidirecti (None, 64) 41216 onal) dense (Dense) (None, 96) 6240 dense_1 (Dense) (None, 128) 12416 dense_2 (Dense) (None, 1) 129 ================================================================= Total params: 365665 (1.39 MB) Trainable params: 365665 (1.39 MB) Non-trainable params: 0 (0.00 Byte) _________________________________________________________________
print(newModel.evaluate(Xtest,yTest))
1302/1302 [==============================] - 135s 102ms/step - loss: 16.8626 16.862579345703125
pred = newModel.predict(Xtest)
1302/1302 [==============================] - 132s 99ms/step
# Plot true and predicted RUL values
plt.plot(yTest, label = "True RUL", color = "red")
plt.plot(pred, label = "Pred RUL", color = "blue")
plt.legend()
plt.show()
x = list(range(len(yTest)))
plt.scatter(x, yTest, color="blue", label="original")
plt.plot(x, pred, color="red", label="predicted")
plt.legend()
plt.show()
y = yTest.copy()
yhat = pred.copy()
# calculate manually
d = y - yhat
mse_f = np.mean(d**2)
mae_f = np.mean(abs(d))
rmse_f = np.sqrt(mse_f)
r2_f = 1-(sum(d**2)/sum((y-np.mean(y))**2))
print("Results by manual calculation:")
print("MAE:",mae_f) #mean absolute error - difference between the original and predicted extracted by avg the abs diff over dataset
print("MSE:", mse_f) #mean squared error - diff btw orig and pred extracted by squared the avg diff over the dataset
print("RMSE:", rmse_f) #root mean squared error - is the error rate by the square root of MSE
print("R-Squared:", r2_f) #coefficient of determination - the higher the better - in my case >85% after training on the val dataset
Results by manual calculation: MAE: 2.947035403738186 MSE: 16.86256980900072 RMSE: 4.106405947906358 R-Squared: [0.80207692]
results = pd.DataFrame({'Predicted':pred.flatten()})
results['Actual'] = yTest.flatten()
results
Predicted | Actual | |
---|---|---|
0 | 14.199033 | 15 |
1 | 13.143090 | 13 |
2 | 4.791924 | 1 |
3 | 14.558222 | 19 |
4 | 13.416628 | 15 |
... | ... | ... |
41628 | 26.579113 | 23 |
41629 | 19.725632 | 12 |
41630 | 21.202875 | 27 |
41631 | 4.445514 | 3 |
41632 | 12.522565 | 15 |
41633 rows × 2 columns