Reputation: 45
I have a function that returns several variables codependent with each other. The output it is a data frame with 1 row and n cols. The number of columns in the output depends on one of the inputs of the function. I need to vetorize it and join to "main" dataframe, something like 'dplyr::mutate()' does.
I really try to make a reprex simples as possible:
#data
df <- data.frame("ob" = 1:30,
"ob_pattern" = sample(c("p1", "p2"), size = 30, replace = T),
"value" = runif(n = 30))
> head(df)
ob ob_pattern value
1 1 p1 0.5442453
2 2 p2 0.1274518
3 3 p2 0.4256460
4 4 p1 0.9319009
5 5 p2 0.9828048
6 6 p2 0.2309473
#patterns
df_pt <- data.frame("pattern" = c("p1", "p1", "p2", "p2", "p2"),
"name" = c("n1", "n2", "n1", "n2", "n3" ),
"perct" = c(0.4, 0.15, 0.3, 0.5, 0.18))
> df_pt
pattern name perct
1 p1 n1 0.40
2 p1 n2 0.15
3 p2 n1 0.30
4 p2 n2 0.50
5 p2 n3 0.18
This function creates classes and multiplies a value from the database by a predefined pattern from the pattern table
fun <- function(value, ob_pattern, df_pt){
#filter the pattern
sel_pt <- df_pt %>%
dplyr::filter(pattern == ob_pattern)
out <- data.frame()
for(i in 1:nrow(sel_pt)){
out[1,i] <- sel_pt[i,2]
out[2,i] <- sel_pt[i,3] / value
}
names(out) <- out[1,]
out <- out[-1,]
return(out)
}
This function works fine "manually":
fun(10, "p1", df_pt)
> fun(10, "p1", df_pt)
n1 n2
2 0.04 0.015
fun(10, "p2", df_pt)
> fun(10, "p2", df_pt)
n1 n2 n3
2 0.03 0.05 0.018
However, not goes well in a map iteration:
pmap(list(value = df$value, ob_pattern = df$ob_pattern, df_pt = df_pt), fun)
> pmap(list(value = df$value, ob_pattern = df$ob_pattern, df_pt = df_pt), fun)
Erro: Element 3 of `.l` must have length 1 or 30, not 3
Run `rlang::last_error()` to see where the error occurred.
df <- df %>%
mutate(pmap(list(value = value, ob_pattern = ob_pattern, df_pt = df_pt), fun))
> df <- df %>%
+ mutate(pmap(list(value = value, ob_pattern = ob_pattern, df_pt = df_pt), fun))
Erro: Problem with `mutate()` input `..1`.
i `..1 = pmap(...)`.
x Element 3 of `.l` must have length 1 or 30, not 3
Run `rlang::last_error()` to see where the error occurred.
What I expect:
# A tibble: 6 x 30
ob ob_pattern value n1 n2 n3
<dbl> <chr> <dbl> <dbl> <dbl> <dbl>
1 1 p1 0.544 1.36 3.63 NA
2 2 p2 0.127 0.425 0.255 0.708
3 3 p2 0.426 1.42 0.851 2.36
4 4 p1 0.932 2.33 6.21 NA
5 5 p2 0.983 3.28 1.97 5.46
6 6 p2 0.231 0.770 0.462 1.28
Upvotes: 3
Views: 87
Reputation:
As another approach, this is a strong candidate for a nested dataframe.
In this case, we can adjust your function to take the filtered dataframe from the start.
fun2 <- function(value, sel_pt){
#filter the pattern
out <- data.frame()
for(i in 1:nrow(sel_pt)){
out[1,i] <- sel_pt[i,1]
out[2,i] <- sel_pt[i,2] / value
}
names(out) <- out[1,]
out <- out[-1,]
return(out)
}
Now we can nest-join on df_pt
and map over that as an input.
library(dplyr)
library(tidyr)
library(purrr)
df %>%
nest_join(df_pt, by = c(ob_pattern = "pattern")) %>%
mutate(output = map2(value, df_pt, fun2)) %>%
select(ob, ob_pattern, value, output) %>%
unnest_wider(output)
On another note, this fun2()
can easily be rewritten as follows. This returns the columns as numeric, which is probably what you want.
library(tibble)
fun3 <- function(value, sel_pt){
sel_pt %>%
mutate(perct = perct / value) %>%
deframe()
}
df %>%
nest_join(df_pt, by = c(ob_pattern = "pattern")) %>%
mutate(output = map2(value, df_pt, fun3)) %>%
select(ob, ob_pattern, value, output) %>%
unnest_wider(output)
# A tibble: 30 x 6
ob ob_pattern value n1 n2 n3
<int> <chr> <dbl> <dbl> <dbl> <dbl>
1 1 p1 0.898 0.445 0.167 NA
2 2 p1 0.413 0.970 0.364 NA
3 3 p2 0.507 0.592 0.987 0.355
4 4 p2 0.544 0.551 0.918 0.331
5 5 p2 0.504 0.595 0.992 0.357
6 6 p1 0.00277 145. 54.2 NA
7 7 p1 0.453 0.883 0.331 NA
8 8 p1 0.175 2.29 0.858 NA
9 9 p1 0.595 0.673 0.252 NA
10 10 p2 0.0358 8.37 13.9 5.02
# ... with 20 more rows
Upvotes: 1
Reputation: 887911
The issue is that df_pt
is a data.frame
and it needs to be used as input in each of the looped element. So, wrap it with list
so that it gets recycled as a single unit. When we loop over the data.frame
, the column is a unit and this triggers the error Erro: Element 3 of
.l must have length 1 or 30, not 3
as the number of columns is 3.
library(dplyr)
library(purrr)
pmap_dfr(list(value = df$value, ob_pattern = df$ob_pattern,
df_pt = list(df_pt)), fun, .id = 'ob') %>%
mutate(ob_pattern = df$ob_pattern, .before = 2)
-output
ob ob_pattern n1 n2 n3
1 1 p2 0.412805820786703 0.688009701311172 0.247683492472022
2 2 p2 0.819499036723223 1.36583172787204 0.491699422033934
3 3 p2 0.307851399008221 0.513085665013701 0.184710839404932
4 4 p1 0.512735060593463 0.192275647722549 <NA>
5 5 p1 0.583734910383962 0.218900591393986 <NA>
6 6 p1 1.26403823904009 0.474014339640033 <NA>
7 7 p1 0.520375965374508 0.195140987015441 <NA>
8 8 p2 0.519695574800472 0.86615929133412 0.311817344880283
9 9 p1 0.406595728747128 0.152473398280173 <NA>
10 10 p1 1.19690591834918 0.448839719380944 <NA>
11 11 p1 0.935134681128101 0.350675505423038 <NA>
12 12 p2 0.782381874921124 1.30396979153521 0.469429124952674
13 13 p1 0.902566162028802 0.338462310760801 <NA>
14 14 p2 0.412253449371353 0.687089082285588 0.247352069622812
15 15 p2 0.414083431765533 0.690139052942556 0.24845005905932
16 16 p2 0.540922520169042 0.901537533615069 0.324553512101425
17 17 p2 0.306604097963516 0.511006829939193 0.18396245877811
18 18 p2 1.94204963387021 3.23674938978369 1.16522978032213
19 19 p2 0.302096661043879 0.503494435073132 0.181257996626328
20 20 p1 0.478354496206454 0.17938293607742 <NA>
21 21 p2 0.406759159422302 0.677931932370503 0.244055495653381
22 22 p1 0.929982462421745 0.348743423408154 <NA>
23 23 p2 0.850658644553245 1.41776440758874 0.510395186731947
24 24 p1 1.24950965620306 0.468566121076146 <NA>
25 25 p1 0.807136438261923 0.302676164348221 <NA>
26 26 p2 75.9337007291282 126.55616788188 45.5602204374769
27 27 p2 0.487844654295068 0.813074423825113 0.292706792577041
28 28 p1 0.702944374408066 0.263604140403025 <NA>
29 29 p1 0.417447530041509 0.156542823765566 <NA>
30 30 p2 2.14866591202588 3.58110985337647 1.28919954721553
Or if we want to use pmap
within mutate
library(tidyr)
df %>%
mutate(out = pmap(across(c(value, ob_pattern)),
~ fun(..1, ..2, df_pt))) %>%
unnest_wider(c(out)) %>%
type.convert(as.is = TRUE)
-output
# A tibble: 30 × 6
ob ob_pattern value n1 n2 n3
<int> <chr> <dbl> <dbl> <dbl> <dbl>
1 1 p2 0.727 0.413 0.688 0.248
2 2 p2 0.366 0.819 1.37 0.492
3 3 p2 0.974 0.308 0.513 0.185
4 4 p1 0.780 0.513 0.192 NA
5 5 p1 0.685 0.584 0.219 NA
6 6 p1 0.316 1.26 0.474 NA
7 7 p1 0.769 0.520 0.195 NA
8 8 p2 0.577 0.520 0.866 0.312
9 9 p1 0.984 0.407 0.152 NA
10 10 p1 0.334 1.20 0.449 NA
# … with 20 more rows
NOTE: The output generated returns character
columns and that is only because of some issue in the OP's fun
code
Or using rowwise
df %>%
rowwise %>%
mutate(out = fun(value, ob_pattern, df_pt)) %>%
ungroup %>%
unpack(out) %>%
type.convert(as.is = TRUE)
-output
# A tibble: 30 × 6
ob ob_pattern value n1 n2 n3
<int> <chr> <dbl> <dbl> <dbl> <dbl>
1 1 p2 0.727 0.413 0.688 0.248
2 2 p2 0.366 0.819 1.37 0.492
3 3 p2 0.974 0.308 0.513 0.185
4 4 p1 0.780 0.513 0.192 NA
5 5 p1 0.685 0.584 0.219 NA
6 6 p1 0.316 1.26 0.474 NA
7 7 p1 0.769 0.520 0.195 NA
8 8 p2 0.577 0.520 0.866 0.312
9 9 p1 0.984 0.407 0.152 NA
10 10 p1 0.334 1.20 0.449 NA
# … with 20 more rows
Upvotes: 0