In [42]:
#import the relevant libraries 
import os
import pymysql
import pandas as pd
In [43]:
#establish the connection to the mysql database
host = "192.168.88.187"
port = "3306"
user = "backblaze"
password = "Testing.2023"
database = "backblaze_ml"

conn = pymysql.connect(
    host=host,
    port=int(3306),
    user=user,
    passwd=password,
    db=database,
    charset='utf8mb4')
In [44]:
#take a look at the data
sqldf = pd.read_sql_query("select * from drive_stats limit 10", conn)
sqldf
Out[44]:
date serial_number model capacity_bytes days_to_failure failure smart_5_raw smart_187_raw smart_188_raw smart_189_raw smart_196_raw smart_197_raw smart_198_raw
0 2013-04-10 W1F0LRXG ST3000DM001 3000592982016 460 0 0 None None None None 8 None
1 2013-04-10 W1F09LPX ST3000DM001 3000592982016 330 0 0 None None None None 0 None
2 2013-04-10 W1F08WP1 ST3000DM001 3000592982016 546 0 0 None None None None 0 None
3 2013-04-10 9XW01ZT9 ST31500541AS 1500301910016 882 0 0 None None None None 0 None
4 2013-04-10 9VS3JYRG ST31500341AS 1500301910016 343 0 2 None None None None 0 None
5 2013-04-10 MJ1311YNG36USA Hitachi HDS5C3030ALA630 3000592982016 1316 0 63 None None None None 0 None
6 2013-04-10 WD-WMC1T1203338 WDC WD30EFRX 3000592982016 1049 0 0 None None None None 0 None
7 2013-04-10 6XW05CJ6 ST31500541AS 1500301910016 99 0 25 None None None None 0 None
8 2013-04-10 9VS3HR6E ST31500341AS 1500301910016 754 0 29 None None None None 0 None
9 2013-04-10 5XW0CBN7 ST32000542AS 2000398934016 42 0 24 None None None None 0 None
In [45]:
sqldf.shape
Out[45]:
(10, 13)
In [46]:
sqldf.columns
Out[46]:
Index(['date', 'serial_number', 'model', 'capacity_bytes', 'days_to_failure',
       'failure', 'smart_5_raw', 'smart_187_raw', 'smart_188_raw',
       'smart_189_raw', 'smart_196_raw', 'smart_197_raw', 'smart_198_raw'],
      dtype='object')
In [47]:
import datetime

sqldf.date = pd.to_datetime(sqldf.date, format='%Y-%m-%d').dt.date
In [48]:
from IPython.display import display
pd.options.display.max_columns = None
pd.options.display.max_rows = None
df = pd.read_sql_query("describe drive_stats", conn)
df
Out[48]:
Field Type Null Key Default Extra
0 date date YES MUL None
1 serial_number varchar(64) YES MUL None
2 model varchar(64) YES MUL None
3 capacity_bytes bigint YES MUL None
4 days_to_failure int YES None
5 failure int YES MUL None
6 smart_5_raw bigint YES None
7 smart_187_raw bigint YES None
8 smart_188_raw bigint YES None
9 smart_189_raw bigint YES None
10 smart_196_raw bigint YES None
11 smart_197_raw bigint YES None
12 smart_198_raw bigint YES None
In [49]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

#take a bigger look at the data -> this time selecting only the raw tables and excluding the ones not relevant for correlation
#df = pd.read_sql_query("select date, serial_number, model, smart_1_raw,smart_2_raw,smart_3_raw,smart_4_raw,smart_5_raw,smart_7_raw,smart_8_raw,smart_9_raw,smart_10_raw,smart_11_raw,smart_12_raw,smart_13_raw,smart_15_raw,smart_16_raw,smart_17_raw,smart_18_raw,smart_22_raw,smart_23_raw,smart_24_raw,smart_160_raw,smart_161_raw,smart_163_raw,smart_164_raw,smart_165_raw,smart_166_raw,smart_167_raw,smart_168_raw,smart_169_raw,smart_170_raw,smart_171_raw,smart_172_raw,smart_173_raw,smart_174_raw,smart_175_raw,smart_176_raw,smart_177_raw,smart_178_raw,smart_179_raw,smart_180_raw,smart_181_raw,smart_182_raw,smart_183_raw,smart_184_raw,smart_187_raw,smart_188_raw,smart_189_raw,smart_190_raw,smart_191_raw,smart_192_raw,smart_193_raw,smart_194_raw,smart_195_raw,smart_196_raw,smart_197_raw,smart_198_raw,smart_199_raw,smart_200_raw,smart_201_raw,smart_202_raw,smart_206_raw,smart_210_raw,smart_218_raw,smart_220_raw,smart_222_raw,smart_223_raw,smart_224_raw,smart_225_raw,smart_226_raw,smart_230_raw,smart_231_raw,smart_232_raw,smart_233_raw,smart_234_raw,smart_235_raw,smart_240_raw,smart_241_raw,smart_242_raw,smart_244_raw,smart_245_raw,smart_246_raw,smart_247_raw,smart_248_raw,smart_250_raw,smart_251_raw,smart_252_raw,smart_254_raw,smart_255_raw, failure from drive_stats limit 1000", conn)
df = pd.read_sql_query("select * from drive_stats", conn)
#df.tail(10)
df
Out[49]:
date serial_number model capacity_bytes days_to_failure failure smart_5_raw smart_187_raw smart_188_raw smart_189_raw smart_196_raw smart_197_raw smart_198_raw
0 2013-04-10 W1F0LRXG ST3000DM001 3000592982016 460 0 0.0 NaN NaN NaN NaN 8.0 NaN
1 2013-04-10 W1F09LPX ST3000DM001 3000592982016 330 0 0.0 NaN NaN NaN NaN 0.0 NaN
2 2013-04-10 W1F08WP1 ST3000DM001 3000592982016 546 0 0.0 NaN NaN NaN NaN 0.0 NaN
3 2013-04-10 9XW01ZT9 ST31500541AS 1500301910016 882 0 0.0 NaN NaN NaN NaN 0.0 NaN
4 2013-04-10 9VS3JYRG ST31500341AS 1500301910016 343 0 2.0 NaN NaN NaN NaN 0.0 NaN
5 2013-04-10 MJ1311YNG36USA Hitachi HDS5C3030ALA630 3000592982016 1316 0 63.0 NaN NaN NaN NaN 0.0 NaN
6 2013-04-10 WD-WMC1T1203338 WDC WD30EFRX 3000592982016 1049 0 0.0 NaN NaN NaN NaN 0.0 NaN
7 2013-04-10 6XW05CJ6 ST31500541AS 1500301910016 99 0 25.0 NaN NaN NaN NaN 0.0 NaN
8 2013-04-10 9VS3HR6E ST31500341AS 1500301910016 754 0 29.0 NaN NaN NaN NaN 0.0 NaN
9 2013-04-10 5XW0CBN7 ST32000542AS 2000398934016 42 0 24.0 NaN NaN NaN NaN 0.0 NaN
10 2013-04-10 W1F03CMF ST3000DM001 3000592982016 356 0 0.0 NaN NaN NaN NaN 0.0 NaN
11 2013-04-10 MJ1311YNG733NA Hitachi HDS5C3030ALA630 3000592982016 1165 0 0.0 NaN NaN NaN NaN 0.0 NaN
12 2013-04-10 S1F091BQ ST3000DM001 3000592982016 478 0 0.0 NaN NaN NaN NaN 0.0 NaN
13 2013-04-10 S1F0KZVZ ST3000DM001 3000592982016 474 0 0.0 NaN NaN NaN NaN 0.0 NaN
14 2013-04-10 9XW02HF6 ST31500541AS 1500301910016 622 0 0.0 NaN NaN NaN NaN 0.0 NaN
15 2013-04-10 S1F03G0F ST3000DM001 3000592982016 421 0 0.0 NaN NaN NaN NaN 0.0 NaN
16 2013-04-10 S1F059PD ST3000DM001 3000592982016 463 0 0.0 NaN NaN NaN NaN 0.0 NaN
17 2013-04-10 WD-WCAV5M690585 WDC WD10EADS 1000204886016 734 0 0.0 NaN NaN NaN NaN 0.0 NaN
18 2013-04-10 S1F06HX8 ST3000DM001 3000592982016 447 0 0.0 NaN NaN NaN NaN 0.0 NaN
19 2013-04-10 6XW0DP29 ST31500541AS 1500301910016 288 0 0.0 NaN NaN NaN NaN 0.0 NaN
20 2013-04-10 9VS1W7EN ST31500341AS 1500301910016 43 0 505.0 NaN NaN NaN NaN 0.0 NaN
21 2013-04-10 MJ1323YNG1PE0C Hitachi HDS5C3030ALA630 3000592982016 921 0 0.0 NaN NaN NaN NaN 0.0 NaN
22 2013-04-10 WD-WCAU44965108 WDC WD10EADS 1000204886016 27 0 0.0 NaN NaN NaN NaN 0.0 NaN
23 2013-04-10 W1F09Z2W ST3000DM001 3000592982016 352 0 0.0 NaN NaN NaN NaN 0.0 NaN
24 2013-04-10 W1F0918M ST3000DM001 3000592982016 267 0 0.0 NaN NaN NaN NaN 0.0 NaN
25 2013-04-10 W1F0L2EW ST3000DM001 3000592982016 428 0 0.0 NaN NaN NaN NaN 0.0 NaN
26 2013-04-10 S1F0CSW2 ST3000DM001 3000592982016 554 0 0.0 NaN NaN NaN NaN 0.0 NaN
27 2013-04-10 S1F0CSW4 ST3000DM001 3000592982016 51 0 0.0 NaN NaN NaN NaN 0.0 NaN
28 2013-04-10 6XW01RJV ST31500541AS 1500301910016 523 0 0.0 NaN NaN NaN NaN 0.0 NaN
29 2013-04-10 Z1F11NTL ST3000DM001 3000592982016 540 0 0.0 NaN NaN NaN NaN 0.0 NaN
30 2013-04-10 Z1Z03G7Q ST4000DX000 4000787030016 1152 0 0.0 NaN NaN NaN NaN 0.0 NaN
31 2013-04-10 MK0311YHGK1G7A Hitachi HDS723030ALA640 3000592982016 440 0 0.0 NaN NaN NaN NaN 0.0 NaN
32 2013-04-10 S1F0A743 ST3000DM001 3000592982016 460 0 0.0 NaN NaN NaN NaN 0.0 NaN
33 2013-04-10 S1F04CTZ ST3000DM001 3000592982016 470 0 0.0 NaN NaN NaN NaN 0.0 NaN
34 2013-04-10 5XW0M6QE ST32000542AS 2000398934016 188 0 0.0 NaN NaN NaN NaN 0.0 NaN
35 2013-04-10 W1F0AZTN ST3000DM001 3000592982016 444 0 0.0 NaN NaN NaN NaN 0.0 NaN
36 2013-04-10 S1F05QHK ST3000DM001 3000592982016 253 0 0.0 NaN NaN NaN NaN 8.0 NaN
37 2013-04-10 W1F0B4S8 ST3000DM001 3000592982016 345 0 0.0 NaN NaN NaN NaN 0.0 NaN
38 2013-04-10 S1F01J8B ST3000DM001 3000592982016 516 0 0.0 NaN NaN NaN NaN 0.0 NaN
39 2013-04-10 S1F00MNJ ST3000DM001 3000592982016 377 0 0.0 NaN NaN NaN NaN 0.0 NaN
40 2013-04-10 MN3221F30DB21E Hitachi HDS723020BLA642 2000398934016 740 0 1.0 NaN NaN NaN NaN 0.0 NaN
41 2013-04-10 S1F01VER ST3000DM001 3000592982016 495 0 0.0 NaN NaN NaN NaN 0.0 NaN
42 2013-04-10 W1F0VZRX ST3000DM001 3000592982016 490 0 0.0 NaN NaN NaN NaN 0.0 NaN
43 2013-04-10 W1F0A479 ST3000DM001 3000592982016 474 0 0.0 NaN NaN NaN NaN 0.0 NaN
44 2013-04-10 W1F0QVW8 ST3000DM001 3000592982016 461 0 0.0 NaN NaN NaN NaN 0.0 NaN
45 2013-04-10 W1F04LH8 ST3000DM001 3000592982016 461 0 0.0 NaN NaN NaN NaN 0.0 NaN
46 2013-04-10 W1F0A61Y ST3000DM001 3000592982016 461 0 0.0 NaN NaN NaN NaN 0.0 NaN
47 2013-04-10 S1F032SM ST3000DM001 3000592982016 516 0 0.0 NaN NaN NaN NaN 0.0 NaN
48 2013-04-10 W1F0JCG9 ST3000DM001 3000592982016 621 0 0.0 NaN NaN NaN NaN 0.0 NaN
49 2013-04-10 Z1F0LDRK ST3000DM001 3000592982016 456 0 0.0 NaN NaN NaN NaN 0.0 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ...
15617977 2023-03-31 PL1331LAGRHDZH HGST HMS5C4040BLE640 4000787030016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15617978 2023-03-31 PL1331LAGS58GH HGST HMS5C4040BLE640 4000787030016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15617979 2023-03-31 PL1331LAGTW0KH HGST HMS5C4040ALE640 4000787030016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15617980 2023-03-31 PL1331LAHBN14H HGST HMS5C4040BLE640 4000787030016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15617981 2023-03-31 PL1331LAHGUAAH HGST HMS5C4040ALE640 4000787030016 0 0 1.0 NaN NaN NaN 1.0 0.0 0.0
15617982 2023-03-31 PL2331LAGSTGLJ HGST HMS5C4040BLE640 4000787030016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15617983 2023-03-31 PL2331LAGSU5RJ HGST HMS5C4040BLE640 4000787030016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15617984 2023-03-31 PL1331LAHEB2LH HGST HMS5C4040ALE640 4000787030016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15617985 2023-03-31 ZA189X28 ST8000NM0055 8001563222016 0 0 0.0 0.0 0.000000e+00 0.0 NaN 0.0 0.0
15617986 2023-03-31 ZA14LWDQ ST8000NM0055 8001563222016 0 0 0.0 0.0 8.590066e+09 0.0 NaN 0.0 0.0
15617987 2023-03-31 ZA16Z98M ST8000NM0055 8001563222016 0 0 0.0 6.0 0.000000e+00 0.0 NaN 8.0 8.0
15617988 2023-03-31 ZA180YVS ST8000NM0055 8001563222016 0 0 0.0 0.0 0.000000e+00 0.0 NaN 0.0 0.0
15617989 2023-03-31 ZA1GA3V4 ST8000NM0055 8001563222016 0 0 0.0 0.0 0.000000e+00 0.0 NaN 0.0 0.0
15617990 2023-03-31 ZA13QBK3 ST8000NM0055 8001563222016 0 0 0.0 0.0 0.000000e+00 0.0 NaN 0.0 0.0
15617991 2023-03-31 ZA180Y97 ST8000NM0055 8001563222016 0 0 0.0 0.0 0.000000e+00 0.0 NaN 0.0 0.0
15617992 2023-03-31 ZA1814NK ST8000NM0055 8001563222016 0 0 0.0 0.0 0.000000e+00 0.0 NaN 0.0 0.0
15617993 2023-03-31 96ICTBQ9T TOSHIBA MQ01ABF050 500107862016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15617994 2023-03-31 5PHLEMAD HGST HUH721212ALE604 12000138625024 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15617995 2023-03-31 ZL2300QV ST16000NM001G 16000900661248 0 0 0.0 0.0 0.000000e+00 NaN NaN 0.0 0.0
15617996 2023-03-31 ZA189K74 ST8000NM0055 8001563222016 0 0 24.0 0.0 0.000000e+00 0.0 NaN 0.0 0.0
15617997 2023-03-31 WD-WXU1E83UUNF8 WDC WD5000LPVX 500107862016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15617998 2023-03-31 WD-WX21A35A9U5H WDC WD5000LPVX 500107862016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15617999 2023-03-31 ZA21HD7E ST10000NM0086 10000831348736 0 0 243.0 4.0 3.000000e+00 93.0 NaN 0.0 0.0
15618000 2023-03-31 ZS5064WT ST10000NM001G 10000831348736 0 0 0.0 0.0 0.000000e+00 NaN NaN 0.0 0.0
15618001 2023-03-31 AAG6E4KH HGST HUH721212ALN604 12000138625024 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15618002 2023-03-31 5090A07UF97G TOSHIBA MG07ACA14TA 14000519643136 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15618003 2023-03-31 7030A02SF97G TOSHIBA MG07ACA14TA 14000519643136 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15618004 2023-03-31 PL1331LAHD1K7H HGST HMS5C4040BLE640 4000787030016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15618005 2023-03-31 VKGJ24RX HGST HUH728080ALE600 8001563222016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15618006 2023-03-31 VKGJMK1X HGST HUH728080ALE600 8001563222016 0 0 0.0 NaN NaN NaN 0.0 0.0 4.0
15618007 2023-03-31 S2ZYJ9CF715326 ST500LM012 HN 500107862016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15618008 2023-03-31 ZL005CL2 ST12000NM0008 12000138625024 0 0 0.0 0.0 0.000000e+00 NaN NaN 0.0 0.0
15618009 2023-03-31 ZL005D18 ST12000NM0008 12000138625024 0 0 0.0 0.0 0.000000e+00 NaN NaN 0.0 0.0
15618010 2023-03-31 70F0A0B4F97G TOSHIBA MG07ACA14TA 14000519643136 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15618011 2023-03-31 S2ZYJ9GF302327 ST500LM012 HN 500107862016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15618012 2023-03-31 WD-WX61A24V0027 WDC WD5000LPVX 500107862016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15618013 2023-03-31 S2ZYJ9AFA09524 ST500LM012 HN 500107862016 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15618014 2023-03-31 ZDEC8Y2P ST500LM030 500107862016 0 0 0.0 0.0 6.872053e+10 0.0 NaN 0.0 0.0
15618015 2023-03-31 20W0A0RGF97G TOSHIBA MG07ACA14TA 14000519643136 0 0 0.0 NaN NaN NaN 0.0 3.0 1.0
15618016 2023-03-31 ZDEB35TN ST500LM030 500107862016 0 0 0.0 0.0 1.718013e+10 0.0 NaN 0.0 0.0
15618017 2023-03-31 ZDEB33CV ST500LM030 500107862016 0 0 0.0 0.0 2.147516e+10 0.0 NaN 0.0 0.0
15618018 2023-03-31 ZDEB3375 ST500LM030 500107862016 0 0 0.0 0.0 3.865530e+10 0.0 NaN 0.0 0.0
15618019 2023-03-31 ZDEB35E7 ST500LM030 500107862016 0 0 0.0 0.0 2.147516e+10 0.0 NaN 0.0 0.0
15618020 2023-03-31 ZDEABEPN ST500LM030 500107862016 0 0 0.0 0.0 4.295033e+09 0.0 NaN 0.0 0.0
15618021 2023-03-31 ZCH0CAKT ST12000NM0007 12000138625024 0 0 0.0 0.0 4.295033e+09 NaN NaN 0.0 0.0
15618022 2023-03-31 ZCH06VE2 ST12000NM0007 12000138625024 0 0 0.0 1.0 0.000000e+00 NaN NaN 0.0 0.0
15618023 2023-03-31 X8L0A01BF97G TOSHIBA MG07ACA14TA 14000519643136 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15618024 2023-03-31 9JG4657T WDC WUH721414ALE6L4 14000519643136 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15618025 2023-03-31 6090A00RFVKG TOSHIBA MG08ACA16TA 16000900661248 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0
15618026 2023-03-31 51R0A2Q8FVGG TOSHIBA MG08ACA16TE 16000900661248 0 0 0.0 NaN NaN NaN 0.0 0.0 0.0

15618027 rows × 13 columns

In [50]:
df.fillna(0, inplace = True)
df.tail(10)
Out[50]:
date serial_number model capacity_bytes days_to_failure failure smart_5_raw smart_187_raw smart_188_raw smart_189_raw smart_196_raw smart_197_raw smart_198_raw
15618017 2023-03-31 ZDEB33CV ST500LM030 500107862016 0 0 0.0 0.0 2.147516e+10 0.0 0.0 0.0 0.0
15618018 2023-03-31 ZDEB3375 ST500LM030 500107862016 0 0 0.0 0.0 3.865530e+10 0.0 0.0 0.0 0.0
15618019 2023-03-31 ZDEB35E7 ST500LM030 500107862016 0 0 0.0 0.0 2.147516e+10 0.0 0.0 0.0 0.0
15618020 2023-03-31 ZDEABEPN ST500LM030 500107862016 0 0 0.0 0.0 4.295033e+09 0.0 0.0 0.0 0.0
15618021 2023-03-31 ZCH0CAKT ST12000NM0007 12000138625024 0 0 0.0 0.0 4.295033e+09 0.0 0.0 0.0 0.0
15618022 2023-03-31 ZCH06VE2 ST12000NM0007 12000138625024 0 0 0.0 1.0 0.000000e+00 0.0 0.0 0.0 0.0
15618023 2023-03-31 X8L0A01BF97G TOSHIBA MG07ACA14TA 14000519643136 0 0 0.0 0.0 0.000000e+00 0.0 0.0 0.0 0.0
15618024 2023-03-31 9JG4657T WDC WUH721414ALE6L4 14000519643136 0 0 0.0 0.0 0.000000e+00 0.0 0.0 0.0 0.0
15618025 2023-03-31 6090A00RFVKG TOSHIBA MG08ACA16TA 16000900661248 0 0 0.0 0.0 0.000000e+00 0.0 0.0 0.0 0.0
15618026 2023-03-31 51R0A2Q8FVGG TOSHIBA MG08ACA16TE 16000900661248 0 0 0.0 0.0 0.000000e+00 0.0 0.0 0.0 0.0
In [51]:
df.columns
Out[51]:
Index(['date', 'serial_number', 'model', 'capacity_bytes', 'days_to_failure',
       'failure', 'smart_5_raw', 'smart_187_raw', 'smart_188_raw',
       'smart_189_raw', 'smart_196_raw', 'smart_197_raw', 'smart_198_raw'],
      dtype='object')
In [52]:
import numpy as np

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (-1,1))
df[['smart_5_raw',
       'smart_187_raw', 'smart_188_raw', 'smart_189_raw', 'smart_196_raw',
       'smart_197_raw', 'smart_198_raw']] = scaler.fit_transform(df[['smart_5_raw',
       'smart_187_raw', 'smart_188_raw', 'smart_189_raw', 'smart_196_raw',
       'smart_197_raw', 'smart_198_raw']])

#df[['smart_1_raw','smart_2_raw','smart_3_raw','smart_4_raw','smart_5_raw','smart_7_raw','smart_8_raw','smart_9_raw','smart_10_raw','smart_11_raw','smart_12_raw','smart_13_raw','smart_15_raw','smart_16_raw','smart_17_raw','smart_18_raw','smart_22_raw','smart_23_raw','smart_24_raw','smart_160_raw','smart_161_raw','smart_163_raw','smart_164_raw','smart_165_raw','smart_166_raw','smart_167_raw','smart_168_raw','smart_169_raw','smart_170_raw','smart_171_raw','smart_172_raw','smart_173_raw','smart_174_raw','smart_175_raw','smart_176_raw','smart_177_raw','smart_178_raw','smart_179_raw','smart_180_raw','smart_181_raw','smart_182_raw','smart_183_raw','smart_184_raw','smart_187_raw','smart_188_raw','smart_189_raw','smart_190_raw','smart_191_raw','smart_192_raw','smart_193_raw','smart_194_raw','smart_195_raw','smart_196_raw','smart_197_raw','smart_198_raw','smart_199_raw','smart_200_raw','smart_201_raw','smart_202_raw','smart_206_raw','smart_210_raw','smart_218_raw','smart_220_raw','smart_222_raw','smart_223_raw','smart_224_raw','smart_225_raw','smart_226_raw','smart_230_raw','smart_231_raw','smart_232_raw','smart_233_raw','smart_234_raw','smart_235_raw','smart_240_raw','smart_241_raw','smart_242_raw','smart_244_raw','smart_245_raw','smart_246_raw','smart_247_raw','smart_248_raw','smart_250_raw','smart_251_raw','smart_252_raw','smart_254_raw','smart_255_raw']] = scaler.fit_transform(df[['smart_1_raw','smart_2_raw','smart_3_raw','smart_4_raw','smart_5_raw','smart_7_raw','smart_8_raw','smart_9_raw','smart_10_raw','smart_11_raw','smart_12_raw','smart_13_raw','smart_15_raw','smart_16_raw','smart_17_raw','smart_18_raw','smart_22_raw','smart_23_raw','smart_24_raw','smart_160_raw','smart_161_raw','smart_163_raw','smart_164_raw','smart_165_raw','smart_166_raw','smart_167_raw','smart_168_raw','smart_169_raw','smart_170_raw','smart_171_raw','smart_172_raw','smart_173_raw','smart_174_raw','smart_175_raw','smart_176_raw','smart_177_raw','smart_178_raw','smart_179_raw','smart_180_raw','smart_181_raw','smart_182_raw','smart_183_raw','smart_184_raw','smart_187_raw','smart_188_raw','smart_189_raw','smart_190_raw','smart_191_raw','smart_192_raw','smart_193_raw','smart_194_raw','smart_195_raw','smart_196_raw','smart_197_raw','smart_198_raw','smart_199_raw','smart_200_raw','smart_201_raw','smart_202_raw','smart_206_raw','smart_210_raw','smart_218_raw','smart_220_raw','smart_222_raw','smart_223_raw','smart_224_raw','smart_225_raw','smart_226_raw','smart_230_raw','smart_231_raw','smart_232_raw','smart_233_raw','smart_234_raw','smart_235_raw','smart_240_raw','smart_241_raw','smart_242_raw','smart_244_raw','smart_245_raw','smart_246_raw','smart_247_raw','smart_248_raw','smart_250_raw','smart_251_raw','smart_252_raw','smart_254_raw','smart_255_raw']])
#df

df
Out[52]:
date serial_number model capacity_bytes days_to_failure failure smart_5_raw smart_187_raw smart_188_raw smart_189_raw smart_196_raw smart_197_raw smart_198_raw
0 2013-04-10 W1F0LRXG ST3000DM001 3000592982016 460 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -0.999965 -1.000000
1 2013-04-10 W1F09LPX ST3000DM001 3000592982016 330 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
2 2013-04-10 W1F08WP1 ST3000DM001 3000592982016 546 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
3 2013-04-10 9XW01ZT9 ST31500541AS 1500301910016 882 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
4 2013-04-10 9VS3JYRG ST31500341AS 1500301910016 343 0 -0.999939 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
5 2013-04-10 MJ1311YNG36USA Hitachi HDS5C3030ALA630 3000592982016 1316 0 -0.998077 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
6 2013-04-10 WD-WMC1T1203338 WDC WD30EFRX 3000592982016 1049 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
7 2013-04-10 6XW05CJ6 ST31500541AS 1500301910016 99 0 -0.999237 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
8 2013-04-10 9VS3HR6E ST31500341AS 1500301910016 754 0 -0.999115 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
9 2013-04-10 5XW0CBN7 ST32000542AS 2000398934016 42 0 -0.999267 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
10 2013-04-10 W1F03CMF ST3000DM001 3000592982016 356 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
11 2013-04-10 MJ1311YNG733NA Hitachi HDS5C3030ALA630 3000592982016 1165 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
12 2013-04-10 S1F091BQ ST3000DM001 3000592982016 478 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
13 2013-04-10 S1F0KZVZ ST3000DM001 3000592982016 474 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
14 2013-04-10 9XW02HF6 ST31500541AS 1500301910016 622 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15 2013-04-10 S1F03G0F ST3000DM001 3000592982016 421 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
16 2013-04-10 S1F059PD ST3000DM001 3000592982016 463 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
17 2013-04-10 WD-WCAV5M690585 WDC WD10EADS 1000204886016 734 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
18 2013-04-10 S1F06HX8 ST3000DM001 3000592982016 447 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
19 2013-04-10 6XW0DP29 ST31500541AS 1500301910016 288 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
20 2013-04-10 9VS1W7EN ST31500341AS 1500301910016 43 0 -0.984587 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
21 2013-04-10 MJ1323YNG1PE0C Hitachi HDS5C3030ALA630 3000592982016 921 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
22 2013-04-10 WD-WCAU44965108 WDC WD10EADS 1000204886016 27 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
23 2013-04-10 W1F09Z2W ST3000DM001 3000592982016 352 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
24 2013-04-10 W1F0918M ST3000DM001 3000592982016 267 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
25 2013-04-10 W1F0L2EW ST3000DM001 3000592982016 428 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
26 2013-04-10 S1F0CSW2 ST3000DM001 3000592982016 554 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
27 2013-04-10 S1F0CSW4 ST3000DM001 3000592982016 51 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
28 2013-04-10 6XW01RJV ST31500541AS 1500301910016 523 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
29 2013-04-10 Z1F11NTL ST3000DM001 3000592982016 540 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
30 2013-04-10 Z1Z03G7Q ST4000DX000 4000787030016 1152 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
31 2013-04-10 MK0311YHGK1G7A Hitachi HDS723030ALA640 3000592982016 440 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
32 2013-04-10 S1F0A743 ST3000DM001 3000592982016 460 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
33 2013-04-10 S1F04CTZ ST3000DM001 3000592982016 470 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
34 2013-04-10 5XW0M6QE ST32000542AS 2000398934016 188 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
35 2013-04-10 W1F0AZTN ST3000DM001 3000592982016 444 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
36 2013-04-10 S1F05QHK ST3000DM001 3000592982016 253 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -0.999965 -1.000000
37 2013-04-10 W1F0B4S8 ST3000DM001 3000592982016 345 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
38 2013-04-10 S1F01J8B ST3000DM001 3000592982016 516 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
39 2013-04-10 S1F00MNJ ST3000DM001 3000592982016 377 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
40 2013-04-10 MN3221F30DB21E Hitachi HDS723020BLA642 2000398934016 740 0 -0.999969 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
41 2013-04-10 S1F01VER ST3000DM001 3000592982016 495 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
42 2013-04-10 W1F0VZRX ST3000DM001 3000592982016 490 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
43 2013-04-10 W1F0A479 ST3000DM001 3000592982016 474 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
44 2013-04-10 W1F0QVW8 ST3000DM001 3000592982016 461 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
45 2013-04-10 W1F04LH8 ST3000DM001 3000592982016 461 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
46 2013-04-10 W1F0A61Y ST3000DM001 3000592982016 461 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
47 2013-04-10 S1F032SM ST3000DM001 3000592982016 516 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
48 2013-04-10 W1F0JCG9 ST3000DM001 3000592982016 621 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
49 2013-04-10 Z1F0LDRK ST3000DM001 3000592982016 456 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
... ... ... ... ... ... ... ... ... ... ... ... ... ...
15617977 2023-03-31 PL1331LAGRHDZH HGST HMS5C4040BLE640 4000787030016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617978 2023-03-31 PL1331LAGS58GH HGST HMS5C4040BLE640 4000787030016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617979 2023-03-31 PL1331LAGTW0KH HGST HMS5C4040ALE640 4000787030016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617980 2023-03-31 PL1331LAHBN14H HGST HMS5C4040BLE640 4000787030016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617981 2023-03-31 PL1331LAHGUAAH HGST HMS5C4040ALE640 4000787030016 0 0 -0.999969 -1.000000 -1.000000 -1.000000 -0.999781 -1.000000 -1.000000
15617982 2023-03-31 PL2331LAGSTGLJ HGST HMS5C4040BLE640 4000787030016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617983 2023-03-31 PL2331LAGSU5RJ HGST HMS5C4040BLE640 4000787030016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617984 2023-03-31 PL1331LAHEB2LH HGST HMS5C4040ALE640 4000787030016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617985 2023-03-31 ZA189X28 ST8000NM0055 8001563222016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617986 2023-03-31 ZA14LWDQ ST8000NM0055 8001563222016 0 0 -1.000000 -1.000000 -0.998315 -1.000000 -1.000000 -1.000000 -1.000000
15617987 2023-03-31 ZA16Z98M ST8000NM0055 8001563222016 0 0 -1.000000 -0.999817 -1.000000 -1.000000 -1.000000 -0.999965 -0.999965
15617988 2023-03-31 ZA180YVS ST8000NM0055 8001563222016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617989 2023-03-31 ZA1GA3V4 ST8000NM0055 8001563222016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617990 2023-03-31 ZA13QBK3 ST8000NM0055 8001563222016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617991 2023-03-31 ZA180Y97 ST8000NM0055 8001563222016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617992 2023-03-31 ZA1814NK ST8000NM0055 8001563222016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617993 2023-03-31 96ICTBQ9T TOSHIBA MQ01ABF050 500107862016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617994 2023-03-31 5PHLEMAD HGST HUH721212ALE604 12000138625024 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617995 2023-03-31 ZL2300QV ST16000NM001G 16000900661248 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617996 2023-03-31 ZA189K74 ST8000NM0055 8001563222016 0 0 -0.999267 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617997 2023-03-31 WD-WXU1E83UUNF8 WDC WD5000LPVX 500107862016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617998 2023-03-31 WD-WX21A35A9U5H WDC WD5000LPVX 500107862016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15617999 2023-03-31 ZA21HD7E ST10000NM0086 10000831348736 0 0 -0.992583 -0.999878 -1.000000 -0.997162 -1.000000 -1.000000 -1.000000
15618000 2023-03-31 ZS5064WT ST10000NM001G 10000831348736 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618001 2023-03-31 AAG6E4KH HGST HUH721212ALN604 12000138625024 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618002 2023-03-31 5090A07UF97G TOSHIBA MG07ACA14TA 14000519643136 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618003 2023-03-31 7030A02SF97G TOSHIBA MG07ACA14TA 14000519643136 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618004 2023-03-31 PL1331LAHD1K7H HGST HMS5C4040BLE640 4000787030016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618005 2023-03-31 VKGJ24RX HGST HUH728080ALE600 8001563222016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618006 2023-03-31 VKGJMK1X HGST HUH728080ALE600 8001563222016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -0.999983
15618007 2023-03-31 S2ZYJ9CF715326 ST500LM012 HN 500107862016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618008 2023-03-31 ZL005CL2 ST12000NM0008 12000138625024 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618009 2023-03-31 ZL005D18 ST12000NM0008 12000138625024 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618010 2023-03-31 70F0A0B4F97G TOSHIBA MG07ACA14TA 14000519643136 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618011 2023-03-31 S2ZYJ9GF302327 ST500LM012 HN 500107862016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618012 2023-03-31 WD-WX61A24V0027 WDC WD5000LPVX 500107862016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618013 2023-03-31 S2ZYJ9AFA09524 ST500LM012 HN 500107862016 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618014 2023-03-31 ZDEC8Y2P ST500LM030 500107862016 0 0 -1.000000 -1.000000 -0.986521 -1.000000 -1.000000 -1.000000 -1.000000
15618015 2023-03-31 20W0A0RGF97G TOSHIBA MG07ACA14TA 14000519643136 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -0.999987 -0.999996
15618016 2023-03-31 ZDEB35TN ST500LM030 500107862016 0 0 -1.000000 -1.000000 -0.996630 -1.000000 -1.000000 -1.000000 -1.000000
15618017 2023-03-31 ZDEB33CV ST500LM030 500107862016 0 0 -1.000000 -1.000000 -0.995788 -1.000000 -1.000000 -1.000000 -1.000000
15618018 2023-03-31 ZDEB3375 ST500LM030 500107862016 0 0 -1.000000 -1.000000 -0.992418 -1.000000 -1.000000 -1.000000 -1.000000
15618019 2023-03-31 ZDEB35E7 ST500LM030 500107862016 0 0 -1.000000 -1.000000 -0.995788 -1.000000 -1.000000 -1.000000 -1.000000
15618020 2023-03-31 ZDEABEPN ST500LM030 500107862016 0 0 -1.000000 -1.000000 -0.999158 -1.000000 -1.000000 -1.000000 -1.000000
15618021 2023-03-31 ZCH0CAKT ST12000NM0007 12000138625024 0 0 -1.000000 -1.000000 -0.999158 -1.000000 -1.000000 -1.000000 -1.000000
15618022 2023-03-31 ZCH06VE2 ST12000NM0007 12000138625024 0 0 -1.000000 -0.999969 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618023 2023-03-31 X8L0A01BF97G TOSHIBA MG07ACA14TA 14000519643136 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618024 2023-03-31 9JG4657T WDC WUH721414ALE6L4 14000519643136 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618025 2023-03-31 6090A00RFVKG TOSHIBA MG08ACA16TA 16000900661248 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000
15618026 2023-03-31 51R0A2Q8FVGG TOSHIBA MG08ACA16TE 16000900661248 0 0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000

15618027 rows × 13 columns

In [53]:
df.columns
Out[53]:
Index(['date', 'serial_number', 'model', 'capacity_bytes', 'days_to_failure',
       'failure', 'smart_5_raw', 'smart_187_raw', 'smart_188_raw',
       'smart_189_raw', 'smart_196_raw', 'smart_197_raw', 'smart_198_raw'],
      dtype='object')
In [54]:
#heatmap

import seaborn as sns
import matplotlib.pyplot as plt

cr=df[['smart_5_raw',
       'smart_187_raw', 'smart_188_raw', 'smart_189_raw', 'smart_196_raw',
       'smart_197_raw', 'smart_198_raw','failure']].corr()

#colormap = plt.cm.RdBu
plt.figure(figsize=(15,15))
plt.title('Correlation matrix', y=1.05, size=16)

mask = np.zeros_like(cr)
mask[np.triu_indices_from(mask)] = True

#heatm = sns.heatmap(cr, mask = mask, linewidths=0.1,vmax=1.0, square=True, cmap=colormap, linecolor='white', annot=True)
heatm = sns.heatmap(cr, mask = mask, annot=True, cmap="Blues", fmt='.1g')
In [55]:
plt.show()

As we can see from the heatmap above there is strong correlation between smart_198_raw and smart_197_raw Backblaze have chosen both for their prediction model as not all disk manufacturers report both however for my case - the general model (for HDDs in the BB dataset) - I will select only one because all selected disk models report both of them and I'll chose the one with most entries in the database -> smart_197_raw (197 has 15602408 reported entries / 198 has 14792555 reported entries)

Final cleanup stage

mysql> select count(smart_197_raw) from drive_stats ;
 +----------------------+
| count(smart_197_raw) |
+----------------------+
|             15602408 |
+----------------------+
1 row in set (43.70 sec)
mysql> select count(smart_198_raw) from drive_stats ;
+----------------------+
| count(smart_198_raw) |
+----------------------+
|             14792555 |
+----------------------+
1 row in set (41.92 sec)
mysql> alter table drive_stats drop column smart_198_raw;
Query OK, 15618027 rows affected (11 min 54.73 sec)
Records: 15618027  Duplicates: 0  Warnings: 0

mysql> describe drive_stats;
+-----------------+-------------+------+-----+---------+-------+
| Field           | Type        | Null | Key | Default | Extra |
+-----------------+-------------+------+-----+---------+-------+
| date            | date        | YES  | MUL | NULL    |       |
| serial_number   | varchar(64) | YES  | MUL | NULL    |       |
| model           | varchar(64) | YES  | MUL | NULL    |       |
| capacity_bytes  | bigint      | YES  | MUL | NULL    |       |
| days_to_failure | int         | YES  |     | NULL    |       |
| failure         | int         | YES  | MUL | NULL    |       |
| smart_5_raw     | bigint      | YES  |     | NULL    |       |
| smart_187_raw   | bigint      | YES  |     | NULL    |       |
| smart_188_raw   | bigint      | YES  |     | NULL    |       |
| smart_189_raw   | bigint      | YES  |     | NULL    |       |
| smart_196_raw   | bigint      | YES  |     | NULL    |       |
| smart_197_raw   | bigint      | YES  |     | NULL    |       |
+-----------------+-------------+------+-----+---------+-------+
12 rows in set (0.02 sec)