Reputation: 203
I have this following dataframe
ID FID Mother Father Sib cousin Aunt uncle PAT
1 A1 A FALSE
2 A2 A FALSE
3 A3 A A1 A2 A4 FALSE
4 A4 A A1 A2 A3 TRUE
5 B1 B FALSE
6 B2 B FALSE
7 B3 B B1 B2 B4;B5 B8 B7 B6 TRUE
8 B4 B B1 B2 B3;B5 FALSE
9 B5 B B3;B4 FALSE
10 B6 B FALSE
11 B7 B FALSE
12 B8 B B3 FALSE
13 C1 C FALSE
14 C2 C FALSE
15 C3 C C1 C2 TRUE
I would like to add a column "family_relationship" that describes by FID, the familial relashionship between the ID that have TRUE in PAT and the other members as in the following output table.
ID FID Mother Father Sib cousin Aunt uncle PAT family_relationship
1 A1 A FALSE Mother
2 A2 A FALSE Father
3 A3 A A1 A2 A4 FALSE Sib
4 A4 A A1 A2 A3 TRUE PAT
5 B1 B FALSE Mother
6 B2 B FALSE Father
7 B3 B B1 B2 B4;B5 B8 B7 B6 TRUE PAT
8 B4 B B1 B2 B3;B5 FALSE Sib
9 B5 B B3;B4 FALSE sib
10 B6 B FALSE Uncle
11 B7 B FALSE Aunt
12 B8 B B3 FALSE Cousin
13 C1 C FALSE Mother
14 C2 C FALSE Father
15 C3 C C1 C2 TRUE PAT
ID=c("A1","A2","A3","A4","B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "C1", "C2", "C3")
FID=c(rep("A",4), rep("B", 8), rep("C",3))
Mother=c("","","A1", "A1", "","","B1","B1","", "", "","","","","C1")
Father=c("","","A2", "A2", "","","B2","B2","", "", "","","","","C2")
Sib=c("","", "A4", "A3", "", "", "B4;B5","B3;B5", "B3;B4", "", "", "","", "", "" )
cousin=c("","","","","","","B8","","","","","B3", "","","")
Aunt=c("","","","","","","B7","","","","","", "","","")
uncle=c("","","","","","","B6","","","","","", "","","")
PAT=c("FALSE","FALSE","FALSE","TRUE","FALSE","FALSE","TRUE","FALSE","FALSE","FALSE","FALSE","FALSE","FALSE","FALSE","TRUE" )
family_relationship=c("Mother", "Father", "Sib", "PAT", "Mother", "Father", "PAT", "Sib", "sib", "Uncle", "Aunt", "Cousin", "Mother", "Father", "PAT")
Upvotes: 1
Views: 504
Reputation: 30474
Here is an approach using tidyverse
.
First, create a look-up table of relationships where PAT
is TRUE
. Then, merge back with your data and join based on FID
and ID
.
library(tidyverse)
df %>%
filter(PAT == TRUE) %>%
select(-PAT, -ID) %>%
pivot_longer(cols = -FID, names_to = "family_relationship", values_to = "ID") %>%
separate_rows(ID, sep = ";") %>%
right_join(df, by = c("FID", "ID")) %>%
mutate(family_relationship = replace(family_relationship,
is.na(family_relationship) & PAT == T,
"PAT")) %>%
arrange(FID, ID) %>%
relocate(family_relationship, .after = PAT)
Output
FID ID Mother Father Sib Cousin Aunt Uncle PAT family_relationship
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 A A1 "" "" "" "" "" "" FALSE Mother
2 A A2 "" "" "" "" "" "" FALSE Father
3 A A3 "A1" "A2" "A4" "" "" "" FALSE Sib
4 A A4 "A1" "A2" "A3" "" "" "" TRUE PAT
5 B B1 "" "" "" "" "" "" FALSE Mother
6 B B2 "" "" "" "" "" "" FALSE Father
7 B B3 "B1" "B2" "B4;B5" "B8" "B7" "B6" TRUE PAT
8 B B4 "B1" "B2" "B3;B5" "" "" "" FALSE Sib
9 B B5 "" "" "B3;B4" "" "" "" FALSE Sib
10 B B6 "" "" "" "" "" "" FALSE Uncle
11 B B7 "" "" "" "" "" "" FALSE Aunt
12 B B8 "" "" "" "B3" "" "" FALSE Cousin
13 C C1 "" "" "" "" "" "" FALSE Mother
14 C C2 "" "" "" "" "" "" FALSE Father
15 C C3 "C1" "C2" "" "" "" "" TRUE PAT
Upvotes: 1
Reputation: 1202
Ok here is a solution but your instruction are somehow tricky because for instance, B1 and B2 do not appear anywhere in the "family" columns. I therefore took the freedom to assign them NA
df = data.frame(ID,FID,Mother,Father,Sib,cousin,Aunt,uncle,PAT)
a = c()
for(i in 1:nrow(df)){
if(df[i,"PAT"]== 'TRUE'){
a=c(a,setNames('PAT',i))
}
if(df[i,"PAT"]== 'FALSE'){
for(n in 2:ncol(df)){
if(length(grep(df[i,"ID"],df[,n]))>0){
a= c(a, setNames(colnames(df)[n],i))
}
}}
}
df$family_relationship=NA
df[which(!1:nrow(df)%in%names(a)),"family_relationship"]=NA
df[as.numeric(names(a)),"family_relationship"]=as.character(a)
the result looks like this:
ID FID Mother Father Sib cousin Aunt uncle PAT family_relationship
1 A1 A FALSE Mother
2 A2 A FALSE Father
3 A3 A A1 A2 A4 FALSE Sib
4 A4 A A1 A2 A3 TRUE PAT
5 B1 B FALSE <NA>
6 B2 B FALSE <NA>
7 B3 B C1 C2 B4;B5 B8 B7 B6 TRUE PAT
8 B4 B C1 C2 B3;B5 FALSE Sib
9 B5 B B3;B4 FALSE Sib
10 B6 B FALSE uncle
11 B7 B FALSE Aunt
12 B8 B B3 FALSE cousin
13 C1 C FALSE Mother
14 C2 C FALSE Father
15 C3 C C1 C2 TRUE PAT
Let me know if it's ok
Upvotes: 1