Reputation: 223
I have the following data frame:
df <-structure(list(time = c("12:00:00", "12:05:00", "12:10:00", "12:15:00",
"12:20:00", "12:25:00", "12:30:00", "12:35:00", "12:40:00", "12:45:00",
"12:50:00", "12:55:00", "13:00:00", "13:05:00", "13:10:00", "13:15:00",
"13:20:00", "13:25:00"), speedA = c(60L, 75L, 65L, 45L, 12L,
15L, 20L, 45L, 65L, 60L, 60L, 30L, 35L, 45L, 25L, 15L, 10L, 5L
), speedB = c(50L, 30L, NA, 40L, NA, NA, 18L, NA, NA, NA, 15L,
10L, 25L, NA, NA, 12L, NA, NA), speedC = c(60L, 25L, NA, NA,
30L, 15L, 50L, 60L, NA, 35L, 34L, NA, 15L, 64L, 10L, 7L, 60L,
60L), speedD = c(NA, 10L, 60L, NA, 50L, 55L, 45L, 35L, NA, NA,
45L, 60L, 35L, 34L, 36L, 39L, 48L, 47L)), class = "data.frame", row.names = c(NA,
-18L))
I want to replace the NAs with values using interpolation between the horizontal values at the same row of each NA. The expected result:
df2<- structure(list(time = c("12:00:00", "12:05:00", "12:10:00", "12:15:00",
"12:20:00", "12:25:00", "12:30:00", "12:35:00", "12:40:00", "12:45:00",
"12:50:00", "12:55:00", "13:00:00", "13:05:00", "13:10:00", "13:15:00",
"13:20:00", "13:25:00"), speedA = c(60L, 75L, 65L, 45L, 12L,
15L, 20L, 45L, 65L, 60L, 60L, 30L, 35L, 45L, 25L, 15L, 10L, 5L
), speedB = c(50, 30, 63.33333, 40, 21, 15, 18, 52.5, 65, 47.5,
15, 10, 25, 54.5, 17.5, 12, 35, 32.5), speedC = c(60, 25, 61.66667,
40, 30, 15, 50, 60, 65, 35, 34, 35, 15, 64, 10, 7, 60, 60), speedD = c(60L,
10L, 60L, 40L, 50L, 55L, 45L, 35L, 65L, 35L, 45L, 60L, 35L, 34L,
36L, 39L, 48L, 47L)), class = "data.frame", row.names = c(NA,
-18L))
Upvotes: 2
Views: 128
Reputation: 35554
You can use zoo::na.approx()
row-wise with c_across()
.
library(dplyr)
library(tidyr)
library(zoo)
df %>%
rowwise() %>%
mutate(speed = list(na.locf(na.approx(c_across(-time), na.rm = FALSE))), .keep = "unused") %>%
unnest_wider(speed, names_sep = "")
# # A tibble: 18 x 5
# time speed1 speed2 speed3 speed4
# <chr> <dbl> <dbl> <dbl> <dbl>
# 1 12:00:00 60 50 60 60
# 2 12:05:00 75 30 25 10
# 3 12:10:00 65 63.3 61.7 60
# 4 12:15:00 45 40 40 40
# 5 12:20:00 12 21 30 50
# 6 12:25:00 15 15 15 55
# 7 12:30:00 20 18 50 45
# 8 12:35:00 45 52.5 60 35
# 9 12:40:00 65 65 65 65
# 10 12:45:00 60 47.5 35 35
# 11 12:50:00 60 15 34 45
# 12 12:55:00 30 10 35 60
# 13 13:00:00 35 25 15 35
# 14 13:05:00 45 54.5 64 34
# 15 13:10:00 25 17.5 10 36
# 16 13:15:00 15 12 7 39
# 17 13:20:00 10 35 60 48
# 18 13:25:00 5 32.5 60 47
Upvotes: 0
Reputation: 388962
We can use zoo::na.approx
to interpolate values. For values which we are not able to interpolate (NA
values at the last) we use tidyr::fill
to fill it.
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = -time) %>%
group_by(time) %>%
mutate(value = zoo::na.approx(value, na.rm = FALSE)) %>%
fill(value) %>%
pivot_wider()
# time speedA speedB speedC speedD
# <chr> <dbl> <dbl> <dbl> <dbl>
# 1 12:00:00 60 50 60 60
# 2 12:05:00 75 30 25 10
# 3 12:10:00 65 63.333 61.667 60
# 4 12:15:00 45 40 40 40
# 5 12:20:00 12 21 30 50
# 6 12:25:00 15 15 15 55
# 7 12:30:00 20 18 50 45
# 8 12:35:00 45 52.5 60 35
# 9 12:40:00 65 65 65 65
#10 12:45:00 60 47.5 35 35
#11 12:50:00 60 15 34 45
#12 12:55:00 30 10 35 60
#13 13:00:00 35 25 15 35
#14 13:05:00 45 54.5 64 34
#15 13:10:00 25 17.5 10 36
#16 13:15:00 15 12 7 39
#17 13:20:00 10 35 60 48
#18 13:25:00 5 32.5 60 47
Upvotes: 1