chippycentra
chippycentra

Reputation: 3432

Fill a dataframe according to another with binary states in R

I would need help in order to file a df1 according to informations in a df2in a binary manner.

Here is the df1 :

  Species
1       A_1
2       B_1
3       C_1
4       D_1
5       E_1
6       F_1
7       G_1
8       H_1

the df2:

Species Groups
A_1,B_1,C_1   G1 
B_1,H_1     G2 
D_1       G3 
E_1,F_1     G4 

And then I need for each row and each Species within this df2$rows to add a column for corresponding Species in the df1. (0 = no present, 1= present).

The expected output woule be :

Species  G1 G2 G2 G4
A_1        1  0  0  0
B_1        1  1  0  0
C_1        1  0  0  0
D_1        0  0  1  0
E_1        0  0  0  1
F_1        0  0  0  1
G        0  0  0  0
H        0  1  1  0

Here are the data:

df1

structure(list(Species = structure(1:8, .Label = c("A_1", "B_1", 
"C_1", "D_1", "E_1", "F_1", "G_1", "H_1"), class = "factor")), class = "data.frame", row.names = c(NA, 
-8L))

df2

structure(list(Species = structure(1:4, .Label = c("A_1,B_1,C_1", "B_1,H_1", 
"D_1", "E_1,F_1"), class = "factor"), Groups = structure(1:4, .Label = c("G1", 
"G2", "G3", "G4"), class = "factor")), class = "data.frame", row.names = c(NA, 
-4L))

Thanks a lot for your help

Upvotes: 1

Views: 41

Answers (3)

ThomasIsCoding
ThomasIsCoding

Reputation: 102241

Perhaps outer with grepl could help

cbind(df1,with(df2,`colnames<-`(+outer(df1$Species,Species,Vectorize(grepl)),Groups)))

which gives

  Species G1 G2 G3 G4
1     A_1  1  0  0  0
2     B_1  1  1  0  0
3     C_1  1  0  0  0
4     D_1  0  0  1  0
5     E_1  0  0  0  1
6     F_1  0  0  0  1
7     G_1  0  0  0  0
8     H_1  0  1  0  0

Upvotes: 1

Karthik S
Karthik S

Reputation: 11584

Does this work:

library(dplyr)
library(tidyr)
df2 %>% separate_rows(Species) %>% 
   mutate(Val = 1) %>% type.convert(as.is = TRUE) %>% 
   pivot_wider(names_from = Groups, values_from = Val, values_fill = 0) %>% 
   right_join(df1) %>% mutate(across(2:5, ~ replace_na(., 0))) %>% 
   arrange(Species)
Joining, by = "Species"
# A tibble: 8 x 5
  Species    G1    G2    G3    G4
  <chr>   <dbl> <dbl> <dbl> <dbl>
1 A           1     0     0     0
2 B           1     1     0     0
3 C           1     0     0     0
4 D           0     0     1     0
5 E           0     0     0     1
6 F           0     0     0     1
7 G           0     0     0     0
8 H           0     1     0     0

Upvotes: 2

jay.sf
jay.sf

Reputation: 73262

Using grepl.

r <- +t(sapply(df1$Species, grepl, df2$Species))
r
#      [,1] [,2] [,3] [,4]
# [1,]    1    0    0    0
# [2,]    1    1    0    0
# [3,]    1    0    0    0
# [4,]    0    0    1    0
# [5,]    0    0    0    1
# [6,]    0    0    0    1
# [7,]    0    0    0    0
# [8,]    0    1    0    0

And to get the data frame:

d <- cbind.data.frame(Species=df1$Species, `colnames<-`(r, df2$Groups))
d
#   Species G1 G2 G3 G4
# 1     A_1  1  0  0  0
# 2     B_1  1  1  0  0
# 3     C_1  1  0  0  0
# 4     D_1  0  0  1  0
# 5     E_1  0  0  0  1
# 6     F_1  0  0  0  1
# 7     G_1  0  0  0  0
# 8     H_1  0  1  0  0

Upvotes: 1

Related Questions