Reputation: 328
I have a data frame like below, the NAs are snatchy, and I want to find and replace the NAs according to their numbers. I mean, once the number of NAs in one column is equal to/more than 5, then replace these values by another column values in the same rows.
df<- structure(list(Tsoil_33 = c(NA, NA, NA, NA, NA, 5.21,
5.21, 5.21, 5.21, 5.23, NA, NA, NA, 5.23, 5.23, 5.23, 5.23,
5.23, 5.26, 5.26, 5.26, 5.26, 5.26, 5.26, 5.26, 5.26, 5.26, 5.26,
5.26, 5.26, 5.26, 5.26, 5.28, 5.28, 5.28, -0.14, -0.14, -0.14,
-0.17, -0.14, -0.14), Tsoil_34 = c(5.18, 5.18, 5.18, 5.18, 5.18,
5.18, 5.18, 5.18, 5.18, 5.21, 5.21, 5.21, 5.21, 5.21, 5.21, 5.21,
5.21, 5.21, 5.21, 5.21, 5.21, 5.21, 5.21, 5.23, 5.23, 5.23, 5.23,
5.23, 5.23, 5.23, 5.26, 5.23, 5.26, 5.26, 5.26, -0.26, -0.26,
-0.26, -0.26, -0.26, -0.26), SWC_11 = c(NA, NA, NA,
NA, NA, 0.2107, 0.2107, 0.2107, 0.2103, 0.2103, 0.2103,
0.2107, 0.2111, NA, NA, NA, NA, NA, 0.2103,
0.2107, 0.2111, 0.2103, 0.2107, 0.2107, 0.2103, 0.2107, 0.2107,
0.2103, 0.2103, 0.2111, 0.2107, 0.2103, 0.2103, 0.2099, 0.2107,
0.0774, 0.0783, 0.0783, 0.0783, 0.0783, 0.0783), SWC_12 = c(0.2845,
0.2849, 0.2849, 0.2845, 0.2845, 0.2845, 0.2842, 0.2853, 0.2845,
0.2838, 0.2838, 0.2853, 0.2845, 0.2845, 0.2853, 0.2845, 0.2853,
0.2842, 0.2849, 0.2838, 0.2845, 0.2845, 0.2845, 0.2842, 0.2845,
0.2838, 0.2849, 0.2849, 0.2849, 0.2842, 0.2849, 0.2853, 0.2842,
0.2845, 0.2838, 0.0264, 0.0256, 0.0273, 0.0273, 0.0264, 0.0273
), SWC_13 = c(0.2908, 0.29, 0.2908, 0.29, 0.29, 0.2896, 0.2896,
0.29, 0.29, 0.29, 0.2908, 0.29, 0.2904, 0.2896, 0.2904, 0.2916,
0.29, 0.2896, 0.2904, 0.2896, 0.2896, 0.29, 0.29, 0.2892, 0.2896,
0.2896, 0.2896, 0.2892, 0.2892, 0.2896, 0.2896, 0.2892, 0.2896,
0.2892, 0.2896, 0.0733, 0.0747, 0.0733, 0.0733, 0.074, 0.0747
), SWC_14 = c(0.2216, 0.2212, 0.222, 0.2212, 0.2216, 0.222, 0.2212,
0.2216, 0.2216, 0.2223, 0.2216, 0.2216, 0.2212, 0.2216, 0.2216,
0.2216, 0.2208, 0.2208, 0.2208, 0.2212, 0.2208, 0.2208, 0.2208,
0.2208, 0.2208, 0.2204, 0.2208, 0.2208, 0.2208, 0.2208, 0.2208,
0.2208, 0.2212, 0.2208, 0.2204, 0.0579, 0.057, 0.0579, 0.0579,
0.0579, 0.0579), SWC_21 = c(0.2604, 0.2593, 0.2597, 0.2597, 0.2601,
0.2601, 0.2597, 0.2597, 0.2601, 0.2593, 0.2597, 0.2597, 0.2597,
0.2601, 0.2597, 0.2601, 0.2589, 0.2593, 0.2593, 0.2593, 0.2593,
0.2586, 0.2589, 0.2589, 0.2589, 0.2589, 0.2586, 0.2589, 0.2589,
0.2586, 0.2589, 0.2589, 0.2589, 0.2597, 0.2593, 0.1969, 0.1973,
0.1973, 0.1973, 0.1973, 0.1973), SWC_22 = c(0.2378, 0.2378, 0.2419,
0.2378, 0.2378, 0.2374, 0.2374, 0.2378, 0.2374, 0.2378, 0.2374,
0.2374, 0.2378, 0.2378, 0.2382, 0.2374, 0.2374, 0.2378, 0.2378,
0.2378, 0.2374, 0.2374, 0.2382, 0.2382, 0.2378, 0.2389, 0.2378,
0.2374, 0.2423, 0.2374, 0.2378, 0.2371, 0.2374, 0.2374, 0.2371,
0.1089, 0.1089, 0.1097, 0.1097, 0.1089, 0.1089), SWC_23 = c(0.2246,
0.2246, 0.2246, 0.2243, 0.225, 0.2246, 0.225, 0.225, 0.2239,
0.2243, 0.2246, 0.2246, 0.2243, 0.2246, 0.2243, 0.2239, 0.2246,
0.2243, 0.2239, 0.2246, 0.2243, 0.2258, 0.2243, 0.2246, 0.2246,
0.2246, 0.2254, 0.2246, 0.2243, 0.2246, 0.2239, 0.2239, 0.2246,
0.2243, 0.225, 0.1868, 0.1872, 0.1872, 0.1868, 0.1868, 0.1872
), SWC_24 = c(0.2326, 0.233, 0.233, 0.233, 0.233, 0.2326, 0.2341,
0.2333, 0.233, 0.233, 0.233, 0.2326, 0.2326, 0.2326, 0.2322,
0.233, 0.233, 0.2326, 0.2326, 0.2322, 0.233, 0.2322, 0.2322,
0.2318, 0.2318, 0.2318, 0.2322, 0.2326, 0.2322, 0.2326, 0.2326,
0.2314, 0.2322, 0.2318, 0.2314, 0.1763, 0.1763, 0.1772, 0.1776,
0.1776, 0.1767), SWC_31 = c(0.1572, 0.1564, 0.1572, 0.1564, 0.1564,
0.1564, 0.1564, 0.1564, 0.1564, 0.1564, 0.1564, 0.1564, 0.1564,
0.1564, 0.1564, 0.1564, 0.1564, 0.1564, 0.1564, 0.1564, 0.1564,
0.1564, 0.1564, 0.1564, 0.1564, 0.1564, 0.1564, 0.1564, 0.1557,
0.1557, 0.1557, 0.1557, 0.1557, 0.1557, 0.1557, 0.0511, 0.0511,
0.0511, 0.0519, 0.0519, 0.0519), SWC_63 = c(0.2601, NA, 0.2597,
0.2601, 0.2597, 0.2593, 0.2593, 0.2601, 0.2601, 0.2597, 0.2597,
0.2597, 0.2601, 0.2604, 0.2597, 0.2589, 0.2593, 0.2597, 0.2597,
0.2597, 0.2601, 0.2601, 0.2604, 0.2597, 0.2597, 0.2597, 0.2604,
0.2604, 0.2604, 0.2604, 0.2604, NA, NA, 0.2597, 0.2597,
0.2177, 0.2185, 0.2177, 0.2177, 0.2185, 0.2181), SWC_64 = c(0.2303,
0.2307, 0.2307, 0.2318, 0.2307, 0.2303, 0.2307, 0.2303, 0.2303,
0.2299, 0.2303, 0.2307, 0.2303, 0.2307, 0.2307, 0.2307, 0.2307,
0.2307, 0.2311, 0.2311, 0.2318, 0.2322, 0.2322, 0.2318, 0.2318,
0.2314, 0.2314, 0.2314, 0.2318, 0.2318, 0.2318, 0.2318, 0.2314,
0.2314, 0.2318, 0.246, 0.246, 0.246, 0.246, 0.246, 0.246), Tsoil_11_flag = c(2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), Tsoil_12_flag = c(2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), Tsoil_13_flag = c(2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 223770L, 223771L, 223772L,
223773L, 223774L, 223775L), class = "data.frame")
Like the above sample data, repace the first 5 NAs in Tsoil_33 by using the first 5 values in Tsoil_34. And repace the first 5 NAs in SWC_11 by using the first 5 values in SWC_12. There are also some missing values that are less than 5, so using the closest surrounding mean values (of the same column) to fill them.
It's kind of complicated. I need some clues for this.
Upvotes: 0
Views: 36
Reputation: 876
My codes can solve the first part of your problem: replace the values if there are 5 or more consecutive NAs.
# use a small dataset just to develop the codes
df <- df[,c(1,2,3,4)]
# create an empty column as "Flag" for "Tsoil_33"
df$Tsoil_33_flag <- NA
# for the first row, the "Tsoil_33" is "NA", here we assign the flag of "1"
df$Tsoil_33_flag[[1]] <- 1
# then from the 2nd row, calculate the "Flag" number depending on it is NA or not
for (i in 2:length(df$Tsoil_33_flag)){
if (is.na(df$Tsoil_33[[i]]) == TRUE){
df$Tsoil_33_flag[[i]] <- 1 + df$Tsoil_33_flag[[i-1]]
}
else {
df$Tsoil_33_flag[[i]] <- 0
}
}
# when at least 5 consecutive NAs appear, replace them using data from the target column
for (i in 1:length(df$Tsoil_33_flag)){
if (df$Tsoil_33_flag[[i]] >= 5){
number <- df$Tsoil_33_flag[[i]]
df$Tsoil_33[(i-number):i] <- df$Tsoil_34[(i-number):i]
}
}
For the second part of your problem, I think you can submit a separate post.
Upvotes: 1