Patrick Parts
Patrick Parts

Reputation: 203

Mutate a variable depending on several other variables

I have this following dataframe

   ID FID Mother Father   Sib cousin Aunt uncle   PAT
1  A1   A                                       FALSE
2  A2   A                                       FALSE
3  A3   A     A1     A2    A4                   FALSE
4  A4   A     A1     A2    A3                    TRUE
5  B1   B                                       FALSE
6  B2   B                                       FALSE
7  B3   B     B1     B2 B4;B5     B8   B7    B6  TRUE
8  B4   B     B1     B2 B3;B5                   FALSE
9  B5   B               B3;B4                   FALSE
10 B6   B                                       FALSE
11 B7   B                                       FALSE
12 B8   B                         B3            FALSE
13 C1   C                                       FALSE
14 C2   C                                       FALSE
15 C3   C     C1     C2                          TRUE

I would like to add a column "family_relationship" that describes by FID, the familial relashionship between the ID that have TRUE in PAT and the other members as in the following output table.

   ID FID Mother Father   Sib cousin Aunt uncle   PAT family_relationship
1  A1   A                                       FALSE              Mother
2  A2   A                                       FALSE              Father
3  A3   A     A1     A2    A4                   FALSE                 Sib
4  A4   A     A1     A2    A3                    TRUE                 PAT
5  B1   B                                       FALSE              Mother
6  B2   B                                       FALSE              Father
7  B3   B     B1     B2 B4;B5     B8   B7    B6  TRUE                 PAT
8  B4   B     B1     B2 B3;B5                   FALSE                 Sib
9  B5   B               B3;B4                   FALSE                 sib
10 B6   B                                       FALSE               Uncle
11 B7   B                                       FALSE                Aunt
12 B8   B                         B3            FALSE              Cousin
13 C1   C                                       FALSE              Mother
14 C2   C                                       FALSE              Father
15 C3   C     C1     C2                          TRUE                 PAT
ID=c("A1","A2","A3","A4","B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "C1", "C2", "C3")
FID=c(rep("A",4), rep("B", 8), rep("C",3))
Mother=c("","","A1", "A1", "","","B1","B1","", "", "","","","","C1")
Father=c("","","A2", "A2", "","","B2","B2","", "", "","","","","C2")
Sib=c("","", "A4", "A3", "", "", "B4;B5","B3;B5", "B3;B4", "", "", "","", "", "" )
cousin=c("","","","","","","B8","","","","","B3", "","","")
Aunt=c("","","","","","","B7","","","","","", "","","")
uncle=c("","","","","","","B6","","","","","", "","","")
PAT=c("FALSE","FALSE","FALSE","TRUE","FALSE","FALSE","TRUE","FALSE","FALSE","FALSE","FALSE","FALSE","FALSE","FALSE","TRUE" )
family_relationship=c("Mother", "Father", "Sib", "PAT", "Mother", "Father", "PAT", "Sib", "sib", "Uncle", "Aunt", "Cousin", "Mother", "Father", "PAT")

Upvotes: 1

Views: 504

Answers (2)

Ben
Ben

Reputation: 30474

Here is an approach using tidyverse.

First, create a look-up table of relationships where PAT is TRUE. Then, merge back with your data and join based on FID and ID.

library(tidyverse)

df %>%
  filter(PAT == TRUE) %>%
  select(-PAT, -ID) %>%
  pivot_longer(cols = -FID, names_to = "family_relationship", values_to = "ID") %>%
  separate_rows(ID, sep = ";") %>%
  right_join(df, by = c("FID", "ID")) %>%
  mutate(family_relationship = replace(family_relationship, 
                                       is.na(family_relationship) & PAT == T, 
                                       "PAT")) %>%
  arrange(FID, ID) %>%
  relocate(family_relationship, .after = PAT)

Output

   FID   ID    Mother Father Sib     Cousin Aunt  Uncle PAT   family_relationship
   <chr> <chr> <chr>  <chr>  <chr>   <chr>  <chr> <chr> <chr> <chr>              
 1 A     A1    ""     ""     ""      ""     ""    ""    FALSE Mother             
 2 A     A2    ""     ""     ""      ""     ""    ""    FALSE Father             
 3 A     A3    "A1"   "A2"   "A4"    ""     ""    ""    FALSE Sib                
 4 A     A4    "A1"   "A2"   "A3"    ""     ""    ""    TRUE  PAT                
 5 B     B1    ""     ""     ""      ""     ""    ""    FALSE Mother             
 6 B     B2    ""     ""     ""      ""     ""    ""    FALSE Father             
 7 B     B3    "B1"   "B2"   "B4;B5" "B8"   "B7"  "B6"  TRUE  PAT                
 8 B     B4    "B1"   "B2"   "B3;B5" ""     ""    ""    FALSE Sib                
 9 B     B5    ""     ""     "B3;B4" ""     ""    ""    FALSE Sib                
10 B     B6    ""     ""     ""      ""     ""    ""    FALSE Uncle              
11 B     B7    ""     ""     ""      ""     ""    ""    FALSE Aunt               
12 B     B8    ""     ""     ""      "B3"   ""    ""    FALSE Cousin             
13 C     C1    ""     ""     ""      ""     ""    ""    FALSE Mother             
14 C     C2    ""     ""     ""      ""     ""    ""    FALSE Father             
15 C     C3    "C1"   "C2"   ""      ""     ""    ""    TRUE  PAT 

Upvotes: 1

elielink
elielink

Reputation: 1202

Ok here is a solution but your instruction are somehow tricky because for instance, B1 and B2 do not appear anywhere in the "family" columns. I therefore took the freedom to assign them NA

df = data.frame(ID,FID,Mother,Father,Sib,cousin,Aunt,uncle,PAT)
a = c()
for(i in 1:nrow(df)){
  if(df[i,"PAT"]== 'TRUE'){
      a=c(a,setNames('PAT',i))
    }
  if(df[i,"PAT"]== 'FALSE'){
    
      for(n in 2:ncol(df)){
        if(length(grep(df[i,"ID"],df[,n]))>0){
          
          a= c(a, setNames(colnames(df)[n],i)) 
        }
       
      }}
    }
  
df$family_relationship=NA
df[which(!1:nrow(df)%in%names(a)),"family_relationship"]=NA
df[as.numeric(names(a)),"family_relationship"]=as.character(a)

the result looks like this:

   ID FID Mother Father   Sib cousin Aunt uncle   PAT family_relationship
1  A1   A                                       FALSE              Mother
2  A2   A                                       FALSE              Father
3  A3   A     A1     A2    A4                   FALSE                 Sib
4  A4   A     A1     A2    A3                    TRUE                 PAT
5  B1   B                                       FALSE                <NA>
6  B2   B                                       FALSE                <NA>
7  B3   B     C1     C2 B4;B5     B8   B7    B6  TRUE                 PAT
8  B4   B     C1     C2 B3;B5                   FALSE                 Sib
9  B5   B               B3;B4                   FALSE                 Sib
10 B6   B                                       FALSE               uncle
11 B7   B                                       FALSE                Aunt
12 B8   B                         B3            FALSE              cousin
13 C1   C                                       FALSE              Mother
14 C2   C                                       FALSE              Father
15 C3   C     C1     C2                          TRUE                 PAT

Let me know if it's ok

Upvotes: 1

Related Questions