Reputation: 297
I have a dataframe with multiple NULL
values. The class type of the columns are LIST
not NUMERIC
. Is it possible to replace all the NULL
values with the median value of the column? I tried a manual way was to change the NULL
value of the column 1 by 1, using the as.numeric()
function and subsequently apply the median()
function. Is there a more efficient way to do this?
i1 <- sapply(pivot_table_1$`Start Working`, is.null)
pivot_table_1$`Start Working`[i1] <- 0
Output from dput()
:
structure(list(Day = 1:31, `Start Sleeping` = list(0, 20, 35,
40, 50, 0, 40, 0, 0, 40, 50, 0, 0, 40, 0, 40, 35, 45, 0,
0, 65, 35, 40, 40, 0, 50, 40, 0, 0, 0, 0), `Stop Sleeping` = list(
440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440,
440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440,
440, 440, 440, 440, 440, 440, 440), `Start Working` = list(
490, 490, 490, 490, 0, 0, 490, 490, 490, 490, 490, 0, 0,
490, 490, 490, 490, 490, 0, 0, 490, 490, 490, 490, 490, 0,
0, 490, 490, 490, 490), `Stop Working` = list(1005, 1005,
1005, 1005, NULL, NULL, 965, 965, 965, 965, 965, NULL, NULL,
965, 965, 965, 965, 965, NULL, NULL, 965, 965, 965, 965,
965, NULL, NULL, 965, 965, 965, 965), Breakfast = list(690,
645, 615, 540, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
475, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 475, NULL,
NULL, NULL, NULL, NULL, 475, NULL, NULL, NULL, NULL, NULL),
Dinner = list(1390, 1360, 1285, 1270, 1390, NULL, 1140, 1140,
1130, 1135, 1130, NULL, 1165, 1140, 1130, 1135, 1130,
1140, 1140, 1180, NULL, 1145, 1135, 1140, 1135, 1160,
1140, 1140, NULL, 1140, NULL)), row.names = c(NA, -31L
), class = c("tbl_df", "tbl", "data.frame"))
Upvotes: 4
Views: 356
Reputation: 25323
Another possible solution:
library(tidyverse)
df %>%
mutate(across(-Day, ~ ifelse(lengths(.x) == 0, median(unlist(.x)), .x)))
#> # A tibble: 31 × 7
#> Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#> <int> <list> <list> <list> <list>
#> 1 1 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 2 2 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 3 3 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 4 4 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 5 5 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 6 6 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 7 7 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 8 8 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 9 9 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 10 10 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> # … with 21 more rows, and 2 more variables: Breakfast <list>, Dinner <list>
Unnesting:
library(tidyverse)
df %>%
mutate(across(-Day, ~ ifelse(lengths(.x) == 0, median(unlist(.x)), .x))) %>%
unnest(everything())
#> # A tibble: 31 × 7
#> Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0 440 490 1005
#> 2 2 20 440 490 1005
#> 3 3 35 440 490 1005
#> 4 4 40 440 490 1005
#> 5 5 50 440 0 965
#> 6 6 0 440 0 965
#> 7 7 40 440 490 965
#> 8 8 0 440 490 965
#> 9 9 0 440 490 965
#> 10 10 40 440 490 965
#> # … with 21 more rows, and 2 more variables: Breakfast <dbl>, Dinner <dbl>
Upvotes: 0
Reputation: 35554
replace_na()
from tidyr
can be used to replace NULL
s in a list. (NULL
s are the list-column equivalent of NA
s)
library(tidyverse)
replace_na(df, map(keep(df, is.list), ~ list(median(unlist(.x)))))
# # A tibble: 31 × 7
# Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working` Breakfast Dinner
# <int> <list> <list> <list> <list> <list> <list>
# 1 1 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 2 2 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 3 3 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 4 4 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 5 5 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 6 6 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 7 7 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 8 8 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 9 9 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 10 10 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# # … with 21 more rows
If you want these list-columns to be flattened, try unnest()
:
replace_na(df, map(keep(df, is.list), ~ list(median(unlist(.x))))) %>%
unnest(where(is.list))
# # A tibble: 31 × 7
# Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working` Breakfast Dinner
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 0 440 490 1005 690 1390
# 2 2 20 440 490 1005 645 1360
# 3 3 35 440 490 1005 615 1285
# 4 4 40 440 490 1005 540 1270
# 5 5 50 440 0 965 540 1390
# 6 6 0 440 0 965 540 1140
# 7 7 40 440 490 965 540 1140
# 8 8 0 440 490 965 540 1140
# 9 9 0 440 490 965 540 1130
# 10 10 40 440 490 965 540 1135
Upvotes: 1
Reputation: 173793
If you wish to keep the entries as length-one lists you can do:
pivot_table_1[] <- lapply(pivot_table_1, function(x) {
ifelse(lengths(x) == 1, x, list(median(unlist(x))))})
pivot_table_1
#> # A tibble: 31 x 7
#> Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#> <int> <list> <list> <list> <list>
#> 1 1 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 2 2 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 3 3 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 4 4 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 5 5 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 6 6 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 7 7 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 8 8 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 9 9 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 10 10 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> # ... with 21 more rows, and 2 more variables: Breakfast <list>, Dinner <list>
Or, if you want them as numeric columns, do:
pivot_table_1[] <- lapply(pivot_table_1, function(x) {
unlist(ifelse(lengths(x) == 1, x, list(median(unlist(x)))))})
pivot_table_1
#> # A tibble: 31 x 7
#> Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0 440 490 1005
#> 2 2 20 440 490 1005
#> 3 3 35 440 490 1005
#> 4 4 40 440 490 1005
#> 5 5 50 440 0 965
#> 6 6 0 440 0 965
#> 7 7 40 440 490 965
#> 8 8 0 440 490 965
#> 9 9 0 440 490 965
#> 10 10 40 440 490 965
#> # ... with 21 more rows, and 2 more variables: Breakfast <dbl>, Dinner <dbl>
Created on 2022-05-22 by the reprex package (v2.0.1)
Upvotes: 2