1+ # Source: https://www.kaggle.com/juejuewang/handle-missing-values-in-time-series-for-beginners
2+ # Author: Nishant Singh - https://www.datacamp.com/profile/NishantKumarSingh
3+
4+ library(imputeTS )
5+
6+ View(tsAirgap )
7+
8+ plot(tsAirgap , main = " AirPassenger data with missing values" )
9+
10+ statsNA(tsAirgap )
11+
12+ # ########################################################################################
13+
14+ # General imputation methods
15+
16+ par(mfrow = c(2 ,2 ))
17+ # Mean Imputation
18+ plot(na_mean(tsAirgap , option = " mean" ) - AirPassengers , ylim = c(- mean(AirPassengers ), mean(AirPassengers )), ylab = " Difference" , main = " Mean" )
19+ mean((na_mean(tsAirgap , option = " mean" ) - AirPassengers )^ 2 )
20+
21+ # Median Imputation
22+ plot(na_mean(tsAirgap , option = " median" ) - AirPassengers , ylim = c(- mean(AirPassengers ), mean(AirPassengers )), ylab = " Difference" , main = " Median" )
23+ mean((na_mean(tsAirgap , option = " median" ) - AirPassengers )^ 2 )
24+
25+ # Mode Imputation
26+ plot(na_mean(tsAirgap , option = " mode" ) - AirPassengers , ylim = c(- mean(AirPassengers ), mean(AirPassengers )), ylab = " Difference" , main = " Mode" )
27+ mean((na_mean(tsAirgap , option = " mode" ) - AirPassengers )^ 2 )
28+
29+ # Random Imputation
30+ plot(na_random(tsAirgap ) - AirPassengers , ylim = c(- mean(AirPassengers ), mean(AirPassengers )), ylab = " Difference" , main = " Random" )
31+ mean((na_random(tsAirgap ) - AirPassengers )^ 2 )
32+
33+ # ########################################################################################
34+
35+ # TS specific imputation methods
36+
37+ par(mfrow = c(2 ,2 ))
38+ # Last Observartion Carried Forward
39+ plot(na_locf(tsAirgap , option = " locf" ) - AirPassengers , ylim = c(- mean(AirPassengers ), mean(AirPassengers )), ylab = " Difference" , main = " LOCF" )
40+ m1 <- mean((na_locf(tsAirgap , option = " locf" ) - AirPassengers )^ 2 )
41+
42+ # Next Observartion Carried Backward
43+ plot(na_locf(tsAirgap , option = " nocb" ) - AirPassengers , ylim = c(- mean(AirPassengers ), mean(AirPassengers )), ylab = " Difference" , main = " NOCB" )
44+ m2 <- mean((na_locf(tsAirgap , option = " nocb" ) - AirPassengers )^ 2 )
45+
46+ # Linear Interpolation
47+ plot(na_interpolation(tsAirgap , option = " linear" ) - AirPassengers , ylim = c(- mean(AirPassengers ), mean(AirPassengers )), ylab = " Difference" , main = " Linear" )
48+ m3 <- mean((na_interpolation(tsAirgap , option = " linear" ) - AirPassengers )^ 2 )
49+
50+ # Spline Interpolation
51+ plot(na_interpolation(tsAirgap , option = " spline" ) - AirPassengers , ylim = c(- mean(AirPassengers ), mean(AirPassengers )), ylab = " Difference" , main = " Spline" )
52+
53+ m4 <- mean((na_interpolation(tsAirgap , option = " spline" ) - AirPassengers )^ 2 )
54+
55+ data.frame (methods = c(' LOCF' , ' NACB' , ' Linear' , ' Spline' ), MSE = c(m1 , m2 , m3 , m4 ))
56+
57+ # ########################################################################################
58+
59+ # Combined imputation approach
60+
61+ par(mfrow = c(2 ,2 ))
62+ # Seasonal Adjustment then Random
63+ plot(na_seadec(tsAirgap , algorithm = " random" ) - AirPassengers , ylim = c(- mean(AirPassengers ), mean(AirPassengers )), ylab = " Difference" , main = " Seas-Adj -> Random" )
64+ ma1 <- mean((na_seadec(tsAirgap , algorithm = " random" ) - AirPassengers )^ 2 )
65+
66+ # Seasonal Adjustment then Mean
67+ plot(na_seadec(tsAirgap , algorithm = " mean" ) - AirPassengers , ylim = c(- mean(AirPassengers ), mean(AirPassengers )), ylab = " Difference" , main = " Seas-Adj -> Mean" )
68+ ma2 <- mean((na_seadec(tsAirgap , algorithm = " mean" ) - AirPassengers )^ 2 )
69+
70+ # Seasonal Adjustment then LOCF
71+ plot(na_seadec(tsAirgap , algorithm = " locf" ) - AirPassengers , ylim = c(- mean(AirPassengers ), mean(AirPassengers )), ylab = " Difference" , main = " Seas-Adj -> LOCF" )
72+ ma3 <- mean((na_seadec(tsAirgap , algorithm = " locf" ) - AirPassengers )^ 2 )
73+
74+ # Seasonal Adjustment then Linear Interpolation
75+ plot(na_seadec(tsAirgap , algorithm = " interpolation" ) - AirPassengers , ylim = c(- mean(AirPassengers ), mean(AirPassengers )), ylab = " Difference" , main = " Seas-Adj -> Linear" )
76+
77+ ma4 <- mean((na_seadec(tsAirgap , algorithm = " interpolation" ) - AirPassengers )^ 2 )
78+
79+ data.frame (methods = c(" Seas-Adj+Random" , " Seas-Adj+Mean" , " Seas-Adj+LOCF" ," Seas-Adj+Linear" ),
80+ MSE = c(ma1 , ma2 , ma3 , ma4 ))
0 commit comments