Skip to content

Commit 94593a0

Browse files
committed
Adding imputation methods
1 parent cc44686 commit 94593a0

File tree

2 files changed

+82
-1
lines changed

2 files changed

+82
-1
lines changed

r/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ FROM rocker/tidyverse:$TIDYVERSE_TAG
55
RUN install2.r --error \
66
--deps TRUE \
77
anomalize \
8-
coindeskr
8+
coindeskr \
9+
imputeTS \

r/user/extras/ts_imputation.r

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Source: https://www.kaggle.com/juejuewang/handle-missing-values-in-time-series-for-beginners
2+
# Author: Nishant Singh - https://www.datacamp.com/profile/NishantKumarSingh
3+
4+
library(imputeTS)
5+
6+
View(tsAirgap)
7+
8+
plot(tsAirgap, main="AirPassenger data with missing values")
9+
10+
statsNA(tsAirgap)
11+
12+
#########################################################################################
13+
14+
# General imputation methods
15+
16+
par(mfrow=c(2,2))
17+
# Mean Imputation
18+
plot(na_mean(tsAirgap, option = "mean") - AirPassengers, ylim = c(-mean(AirPassengers), mean(AirPassengers)), ylab = "Difference", main = "Mean")
19+
mean((na_mean(tsAirgap, option = "mean") - AirPassengers)^2)
20+
21+
# Median Imputation
22+
plot(na_mean(tsAirgap, option = "median") - AirPassengers, ylim = c(-mean(AirPassengers), mean(AirPassengers)), ylab = "Difference", main = "Median")
23+
mean((na_mean(tsAirgap, option = "median") - AirPassengers)^2)
24+
25+
# Mode Imputation
26+
plot(na_mean(tsAirgap, option = "mode") - AirPassengers, ylim = c(-mean(AirPassengers), mean(AirPassengers)), ylab = "Difference", main = "Mode")
27+
mean((na_mean(tsAirgap, option = "mode") - AirPassengers)^2)
28+
29+
# Random Imputation
30+
plot(na_random(tsAirgap) - AirPassengers, ylim = c(-mean(AirPassengers), mean(AirPassengers)), ylab = "Difference", main = "Random")
31+
mean((na_random(tsAirgap) - AirPassengers)^2)
32+
33+
#########################################################################################
34+
35+
#TS specific imputation methods
36+
37+
par(mfrow=c(2,2))
38+
# Last Observartion Carried Forward
39+
plot(na_locf(tsAirgap, option = "locf") - AirPassengers, ylim = c(-mean(AirPassengers), mean(AirPassengers)), ylab = "Difference", main = "LOCF")
40+
m1 <- mean((na_locf(tsAirgap, option = "locf") - AirPassengers)^2)
41+
42+
# Next Observartion Carried Backward
43+
plot(na_locf(tsAirgap, option = "nocb") - AirPassengers, ylim = c(-mean(AirPassengers), mean(AirPassengers)), ylab = "Difference", main = "NOCB")
44+
m2 <- mean((na_locf(tsAirgap, option = "nocb") - AirPassengers)^2)
45+
46+
# Linear Interpolation
47+
plot(na_interpolation(tsAirgap, option = "linear") - AirPassengers, ylim = c(-mean(AirPassengers), mean(AirPassengers)), ylab = "Difference", main = "Linear")
48+
m3 <- mean((na_interpolation(tsAirgap, option = "linear") - AirPassengers)^2)
49+
50+
# Spline Interpolation
51+
plot(na_interpolation(tsAirgap, option = "spline") - AirPassengers, ylim = c(-mean(AirPassengers), mean(AirPassengers)), ylab = "Difference", main = "Spline")
52+
53+
m4 <- mean((na_interpolation(tsAirgap, option = "spline") - AirPassengers)^2)
54+
55+
data.frame(methods=c('LOCF', 'NACB', 'Linear', 'Spline'), MSE=c(m1, m2, m3, m4))
56+
57+
#########################################################################################
58+
59+
#Combined imputation approach
60+
61+
par(mfrow=c(2,2))
62+
# Seasonal Adjustment then Random
63+
plot(na_seadec(tsAirgap, algorithm = "random") - AirPassengers, ylim = c(-mean(AirPassengers), mean(AirPassengers)), ylab = "Difference", main = "Seas-Adj -> Random")
64+
ma1 <- mean((na_seadec(tsAirgap, algorithm = "random") - AirPassengers)^2)
65+
66+
# Seasonal Adjustment then Mean
67+
plot(na_seadec(tsAirgap, algorithm = "mean") - AirPassengers, ylim = c(-mean(AirPassengers), mean(AirPassengers)), ylab = "Difference", main = "Seas-Adj -> Mean")
68+
ma2 <- mean((na_seadec(tsAirgap, algorithm = "mean") - AirPassengers)^2)
69+
70+
# Seasonal Adjustment then LOCF
71+
plot(na_seadec(tsAirgap, algorithm = "locf") - AirPassengers, ylim = c(-mean(AirPassengers), mean(AirPassengers)), ylab = "Difference", main = "Seas-Adj -> LOCF")
72+
ma3 <- mean((na_seadec(tsAirgap, algorithm = "locf") - AirPassengers)^2)
73+
74+
# Seasonal Adjustment then Linear Interpolation
75+
plot(na_seadec(tsAirgap, algorithm = "interpolation") - AirPassengers, ylim = c(-mean(AirPassengers), mean(AirPassengers)), ylab = "Difference", main = "Seas-Adj -> Linear")
76+
77+
ma4 <- mean((na_seadec(tsAirgap, algorithm = "interpolation") - AirPassengers)^2)
78+
79+
data.frame(methods=c("Seas-Adj+Random", "Seas-Adj+Mean", "Seas-Adj+LOCF","Seas-Adj+Linear"),
80+
MSE=c(ma1, ma2, ma3, ma4))

0 commit comments

Comments
 (0)