pdubois
pdubois

Reputation: 7800

How to add column in a tibble/df using rnorm and parameter taken from values of another column

I have the following data frame:

library(tidyverse)
df <- structure(list(var = c("X", "X", "X", "X", "X", "X", "X", "X", 
"X", "X", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Z", 
"Z", "Z", "Z", "Z", "Z", "Z", "Z", "Z", "Z"), mod_est_mean = c(0.649790620181318, 
0.649790620181318, 0.649790620181318, 0.649790620181318, 0.649790620181318, 
0.649790620181318, 0.649790620181318, 0.649790620181318, 0.649790620181318, 
0.649790620181318, 1.65651567796795, 1.65651567796795, 1.65651567796795, 
1.65651567796795, 1.65651567796795, 1.65651567796795, 1.65651567796795, 
1.65651567796795, 1.65651567796795, 1.65651567796795, 2.78544973796179, 
2.78544973796179, 2.7854497379617, 2.78544973796179, 2.78544973796179, 
2.78544973796179, 2.78544973796179, 2.78544973796179, 2.78544973796179, 
2.78544973796179), mod_est_sd = c(0.37898907459421, 0.37898907459421, 
0.37898907459421, 0.37898907459421, 0.37898907459421, 0.37898907459421, 
0.37898907459421, 0.37898907459421, 0.37898907459421, 0.37898907459421, 
1.27340261798159, 1.27340261798159, 1.27340261798159, 1.27340261798159, 
1.27340261798159, 1.27340261798159, 1.27340261798159, 1.27340261798159, 
1.27340261798159, 1.27340261798159, 2.38265470031565, 2.38265470031565, 
2.38265470031565, 2.38265470031565, 2.38265470031565, 2.38265470031565, 
2.38265470031565, 2.38265470031565, 2.38265470031565, 2.38265470031565
), mod_std_mean = c(0.119846868403716, 0.119846868403716, 0.119846868403716, 
0.119846868403716, 0.119846868403716, 0.119846868403716, 0.119846868403716, 
0.119846868403716, 0.119846868403716, 0.119846868403716, 0.40268526512431, 
0.40268526512431, 0.40268526512431, 0.40268526512431, 0.40268526512431, 
0.40268526512431, 0.40268526512431, 0.40268526512431, 0.40268526512431, 
0.40268526512431, 0.753461573070337, 0.753461573070337, 0.753461573070337, 
0.753461573070337, 0.753461573070337, 0.753461573070337, 0.753461573070337, 
0.753461573070337, 0.753461573070337, 0.753461573070337), mod_std_error = c(0.0847445333522391, 
0.0847445333522391, 0.0847445333522391, 0.0847445333522391, 0.0847445333522391, 
0.0847445333522391, 0.0847445333522391, 0.0847445333522391, 0.0847445333522391, 
0.0847445333522391, 0.284741481653302, 0.284741481653302, 0.284741481653302, 
0.284741481653302, 0.284741481653302, 0.284741481653302, 0.284741481653302, 
0.284741481653302, 0.284741481653302, 0.284741481653302, 0.532777787681519, 
0.532777787681519, 0.532777787681519, 0.532777787681519, 0.532777787681519, 
0.532777787681519, 0.532777787681519, 0.532777787681519, 0.532777787681519, 
0.532777787681519), vec = c(0.626453810742332, 0.183643324222082, 
0.835628612410047, 1.59528080213779, 0.329507771815361, 0.820468384118015, 
0.487429052428485, 0.738324705129217, 0.575781351653492, 0.305388387156356, 
3.0235623369017, 0.779686472822862, 1.24248116108361, 4.429399774355, 
2.24986183628622, 0.0898672180304617, 0.0323805261978922, 1.8876724213706, 
1.64244239019618, 1.18780264243502, 3.67590948643287, 3.12854520292427, 
0.298259933460762, 7.95740678345349, 2.47930299157884, 0.224514958116003, 
0.623182026821317, 5.8830095355971, 1.91260022043448, 1.67176624079881
), dist_name = c("normal", "normal", "normal", "normal", "normal", 
"normal", "normal", "normal", "normal", "normal", "normal", "normal", 
"normal", "normal", "normal", "normal", "normal", "normal", "normal", 
"normal", "normal", "normal", "normal", "normal", "normal", "normal", 
"normal", "normal", "normal", "normal")), .Names = c("var", "mod_est_mean", 
"mod_est_sd", "mod_std_mean", "mod_std_error", "vec", "dist_name"
), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-30L))

It looks like this:

df %>% print(df,n=100)
# A tibble: 30 × 7
     var mod_est_mean mod_est_sd mod_std_mean mod_std_error        vec dist_name
   <chr>        <dbl>      <dbl>        <dbl>         <dbl>      <dbl>     <chr>
1      X    0.6497906  0.3789891    0.1198469    0.08474453 0.62645381    normal
2      X    0.6497906  0.3789891    0.1198469    0.08474453 0.18364332    normal
3      X    0.6497906  0.3789891    0.1198469    0.08474453 0.83562861    normal
4      X    0.6497906  0.3789891    0.1198469    0.08474453 1.59528080    normal
5      X    0.6497906  0.3789891    0.1198469    0.08474453 0.32950777    normal
6      X    0.6497906  0.3789891    0.1198469    0.08474453 0.82046838    normal
7      X    0.6497906  0.3789891    0.1198469    0.08474453 0.48742905    normal
8      X    0.6497906  0.3789891    0.1198469    0.08474453 0.73832471    normal
9      X    0.6497906  0.3789891    0.1198469    0.08474453 0.57578135    normal
10     X    0.6497906  0.3789891    0.1198469    0.08474453 0.30538839    normal
11     Y    1.6565157  1.2734026    0.4026853    0.28474148 3.02356234    normal
12     Y    1.6565157  1.2734026    0.4026853    0.28474148 0.77968647    normal
13     Y    1.6565157  1.2734026    0.4026853    0.28474148 1.24248116    normal
14     Y    1.6565157  1.2734026    0.4026853    0.28474148 4.42939977    normal
15     Y    1.6565157  1.2734026    0.4026853    0.28474148 2.24986184    normal
16     Y    1.6565157  1.2734026    0.4026853    0.28474148 0.08986722    normal
17     Y    1.6565157  1.2734026    0.4026853    0.28474148 0.03238053    normal
18     Y    1.6565157  1.2734026    0.4026853    0.28474148 1.88767242    normal
19     Y    1.6565157  1.2734026    0.4026853    0.28474148 1.64244239    normal
20     Y    1.6565157  1.2734026    0.4026853    0.28474148 1.18780264    normal
21     Z    2.7854497  2.3826547    0.7534616    0.53277779 3.67590949    normal
22     Z    2.7854497  2.3826547    0.7534616    0.53277779 3.12854520    normal
23     Z    2.7854497  2.3826547    0.7534616    0.53277779 0.29825993    normal
24     Z    2.7854497  2.3826547    0.7534616    0.53277779 7.95740678    normal
25     Z    2.7854497  2.3826547    0.7534616    0.53277779 2.47930299    normal
26     Z    2.7854497  2.3826547    0.7534616    0.53277779 0.22451496    normal
27     Z    2.7854497  2.3826547    0.7534616    0.53277779 0.62318203    normal
28     Z    2.7854497  2.3826547    0.7534616    0.53277779 5.88300954    normal
29     Z    2.7854497  2.3826547    0.7534616    0.53277779 1.91260022    normal
30     Z    2.7854497  2.3826547    0.7534616    0.53277779 1.67176624    normal

What I want to do is add a new column new_vec estimated from mod_est_mean, mod_est_sd group by var.

For example, in X wget this rnorm:

> set.seed(1)
> rnorm( dim(df %>% filter(var=="X") %>% select(vec))[1], mean=  0.6497906, sd=  0.3789891)
 [1] 0.4123714 0.7193894 0.3330965 1.2543846 0.7746705 0.3388420 0.8345209 0.9296076 0.8680055 0.5340517

So we have vector that looks like this in the end:

     var mod_est_mean mod_est_sd mod_std_mean mod_std_error        vec dist_name new_vec
1      X    0.6497906  0.3789891    0.1198469    0.08474453 0.62645381    normal  0.4123714
2      X    0.6497906  0.3789891    0.1198469    0.08474453 0.18364332    normal 0.7193894
3      X    0.6497906  0.3789891    0.1198469    0.08474453 0.83562861    normal 0.3330965
4      X    0.6497906  0.3789891    0.1198469    0.08474453 1.59528080    normal 1.2543846 
5      X    0.6497906  0.3789891    0.1198469    0.08474453 0.32950777    normal  0.7746705
6      X    0.6497906  0.3789891    0.1198469    0.08474453 0.82046838    normal 0.3388420
7      X    0.6497906  0.3789891    0.1198469    0.08474453 0.48742905    normal 0.8345209
8      X    0.6497906  0.3789891    0.1198469    0.08474453 0.73832471    normal 0.9296076
9      X    0.6497906  0.3789891    0.1198469    0.08474453 0.57578135    normal 0.8680055
10     X    0.6497906  0.3789891    0.1198469    0.08474453 0.30538839    normal 0.5340517

How can we achieve that for X, Y, Z?

Preferrably using tidyverse.

Upvotes: 2

Views: 558

Answers (1)

akrun
akrun

Reputation: 887173

After grouping by 'var', get the rnorm by specifying the 'n' as n() ie. the number of rows in the group, mean as the first element of 'mod_est_mean' (as it is the same per group) and sd as the first element of 'mod_est_sd'

set.seed(1) 
df %>% 
   group_by(var) %>%
   mutate(new_vec = rnorm(n(), mean = first(mod_est_mean),sd = first(mod_est_sd)))
#    var mod_est_mean mod_est_sd mod_std_mean mod_std_error       vec dist_name   new_vec
#   <chr>        <dbl>      <dbl>        <dbl>         <dbl>     <dbl>     <chr>     <dbl>
#1      X    0.6497906  0.3789891    0.1198469    0.08474453 0.6264538    normal 0.4123715
#2      X    0.6497906  0.3789891    0.1198469    0.08474453 0.1836433    normal 0.7193894
#3      X    0.6497906  0.3789891    0.1198469    0.08474453 0.8356286    normal 0.3330965
#4      X    0.6497906  0.3789891    0.1198469    0.08474453 1.5952808    normal 1.2543846
#5      X    0.6497906  0.3789891    0.1198469    0.08474453 0.3295078    normal 0.7746705
#6      X    0.6497906  0.3789891    0.1198469    0.08474453 0.8204684    normal 0.3388421
#7      X    0.6497906  0.3789891    0.1198469    0.08474453 0.4874291    normal 0.8345209
#8      X    0.6497906  0.3789891    0.1198469    0.08474453 0.7383247    normal 0.9296076
#9      X    0.6497906  0.3789891    0.1198469    0.08474453 0.5757814    normal 0.8680055
#10     X    0.6497906  0.3789891    0.1198469    0.08474453 0.3053884    normal 0.5340518
# ... with 20 more rows

Upvotes: 1

Related Questions