Reputation: 968
This is my sample dataframe
df <- dput(head(Only_mesh,40))
structure(list(PMID = c(1617318, 1691132, 1691132, 1691132, 1691132,
1691132, 1691134, 1691134, 1691134, 1691134, 1691134, 1691134,
1828453, 1828453, 1861532, 1861532, 2031969, 2031969, 2031969,
2182418, 3162515, 3162515, 3305224, 3305224, 3305224, 3305224,
3520799, 6386072, 6803909, 6803909, 6803909, 7627720, 8193884,
8193884, 9554461, 9766494, 9766494, 9853980, 9853980, 9853980
), Term = c("Cyclophosphamide", "Daunorubicin", "Cytosine arabinoside",
"Vincristine", "AMSA", "Azacytidine", "Ara-C", "Amsacrine", "-C",
"5-azacytidine", "AZA", "Daunorubicin", "Mithramycin", "Hydroxyurea",
"Retinol", "Retinol palmitate", "Cytosine arabinoside", "Daunorubicin",
"Amsacrine", "Cytosine arabinoside", "Cytarabine", "Anthracyclines",
"Cytosine arabinoside", "Daunorubicin", "Thioguanine", "Cyclophosphamide",
"Rifampicin", "CR", "Doxorubicin", "Cytarabine", "6-thioguanine",
"Bilirubin", "Fluconazol", "Ofloxacin", "Taxol", "Aclarubicin",
"Etoposide", "Mitoxantrone", "Cytarabine", "Etoposide"), Class = c("Chemical",
"Chemical", "Chemical", "Chemical", "Chemical", "Chemical", "Chemical",
"Chemical", "Chemical", "Chemical", "Chemical", "Chemical", "Chemical",
"Chemical", "Chemical", "Chemical", "Chemical", "Chemical", "Chemical",
"Chemical", "Chemical", "Chemical", "Chemical", "Chemical", "Chemical",
"Chemical", "Chemical", "Chemical", "Chemical", "Chemical", "Chemical",
"Chemical", "Chemical", "Chemical", "Chemical", "Chemical", "Chemical",
"Chemical", "Chemical", "Chemical"), MESH = c("MESH:D003520",
"MESH:D003630", "MESH:D003561", "MESH:D014750", "MESH:D000677",
"MESH:D001374", "MESH:D003561", "MESH:D000677", "MESH:D002244",
"MESH:D001374", "MESH:D001379", "MESH:D003630", "MESH:D008926",
"MESH:D006918", "MESH:D014801", "MESH:C014794", "MESH:D003561",
"MESH:D003630", "MESH:D000677", "MESH:D003561", "MESH:D003561",
"MESH:D018943", "MESH:D003561", "MESH:D003630", "MESH:D013866",
"MESH:D003520", "MESH:D012293", "MESH:D002857", "MESH:D004317",
"MESH:D003561", "MESH:D013866", "MESH:D001663", "MESH:D015725",
"MESH:D015242", "MESH:D017239", "MESH:D015250", "MESH:D005047",
"MESH:D008942", "MESH:D003561", "MESH:D005047")), row.names = c(2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 15L, 16L, 17L,
18L, 19L, 20L, 21L, 22L, 23L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L), class = "data.frame")
My goal is to find out PMID which has single MESH term , then more than one MESH term such as double or triple MESH on multiple PMID.
For example for single MESH term I filter using this.
MESH_D003561 = filter(Only_mesh, MESH %in% c("MESH:D003561"))
MESH_D003630 = filter(Only_mesh, MESH %in% c("MESH:D003630"))
The if i wanted to see how many PMID has both the above MESH term such as 'MESH:D003561' and 'MESH:D003630'
I tried this
x <- dplyr::filter(Only_mesh, MESH == "MESH:D003561" & MESH == "MESH:D003630")
I see no output what i understand is that to use 'AND' condition I need to use two diffrent column in a dataframe.
One of the long way is I filter each MESH term individually and then run intersect to find out PMID which contain both the MESH term.
But I would be glad if any suggestion or help how can i subset or filter based from the single column condition using "AND" as my operator.
Upvotes: 1
Views: 53
Reputation: 173803
You need to group_by(PMID)
first, then filter according to whether both MESH terms are present within that PMID:
library(dplyr)
Only_mesh %>%
group_by(PMID) %>%
filter(any(MESH == "MESH:D003561") & any(MESH == "MESH:D003630")) %>%
filter(MESH == "MESH:D003561" | MESH == "MESH:D003630")
#> # A tibble: 8 x 4
#> # Groups: PMID [4]
#> PMID Term Class MESH
#> <dbl> <chr> <chr> <chr>
#> 1 1691132 Daunorubicin Chemical MESH:D003630
#> 2 1691132 Cytosine arabinoside Chemical MESH:D003561
#> 3 1691134 Ara-C Chemical MESH:D003561
#> 4 1691134 Daunorubicin Chemical MESH:D003630
#> 5 2031969 Cytosine arabinoside Chemical MESH:D003561
#> 6 2031969 Daunorubicin Chemical MESH:D003630
#> 7 3305224 Cytosine arabinoside Chemical MESH:D003561
#> 8 3305224 Daunorubicin Chemical MESH:D003630
Upvotes: 3