Anon
Anon

Reputation: 1547

R: Filter a dataframe based on another dataframe

I want to subset e df based on pf df. The two dataframes share the same rowname. My code adds a "row_names" column, which I don't want.

library(dplyr)

e <- e[, !row.names(e) %in% row.names(pf)]


> dput(e[1:10, 1:10])
structure(list(row_names = c("2315374", "2315376", "2315587", 
"2315588", "2315591", "2315595", "2315598", "2315603", "2315604", 
"2315640"), JHU_113_2.CEL = c(6.28274, 5.81678, 8.88557, 6.2865, 
6.97515, 5.94179, 4.9942, 7.67845, 6.20952, 5.85307), JHU_144.CEL = c(6.79161, 
5.71165, 8.95699, 6.6675, 6.61705, 5.39178, 5.56888, 7.90005, 
6.59687, 6.07303), JHU_173.CEL = c(6.11265, 6.02794, 8.36898, 
6.07503, 6.51994, 5.09497, 5.57912, 7.47594, 6.14608, 6.41875
), JHU_176R.CEL = c(6.13997, 5.37082, 8.28993, 6.76625, 6.74982, 
4.96199, 5.4396, 6.75087, 5.70518, 6.07282), JHU_182.CEL = c(6.68056, 
5.95527, 8.41361, 6.19819, 6.60917, 2.96431, 5.19249, 7.62805, 
6.49572, 6.28283), JHU_186.CEL = c(6.48156, 5.75999, 8.6498, 
6.8426, 6.55182, 4.95204, 5.87991, 8.00069, 6.12622, 6.13699), 
    JHU_187.CEL = c(6.45415, 5.87863, 8.74305, 6.13916, 6.6224, 
    5.00979, 5.6054, 7.34296, 6.2369, 6.16377), JHU_188.CEL = c(6.04542, 
    5.5483, 8.31915, 6.40219, 6.44394, 4.06493, 5.09513, 6.81338, 
    6.39569, 6.48616), JHU_203.CEL = c(5.99176, 6.35571, 8.43548, 
    6.45059, 5.76592, 5.38048, 5.43618, 7.52014, 6.70869, 6.34162
    )), row.names = c(NA, 10L), class = "data.frame")

> dput(pf[1:10, 1:10])
structure(list(JHU_113_2.CEL = c(0.0073, 0.04079, 0.02234, 0.08252, 
0.00127, 0.00256, 0.04265, 0.02244, 0.02615, 0.04563), JHU_144.CEL = c(0.00293, 
0.01525, 0.00914, 0.03644, 0.00162, 0.01274, 0.00798, 0.00955, 
0.00732, 0.02698), JHU_173.CEL = c(0.03034, 0.0309, 0.05024, 
0.09374, 0.00548, 0.04754, 0.03491, 0.03399, 0.02661, 0.01546
), JHU_176R.CEL = c(0.02571, 0.08493, 0.06918, 0.0337, 0.00945, 
0.03185, 0.03182, 0.15281, 0.04117, 0.03758), JHU_182.CEL = c(0.00436, 
0.01303, 0.0521, 0.04909, 0.00341, 0.45759, 0.02033, 0.01911, 
0.01037, 0.01974), JHU_186.CEL = c(0.00788, 0.03559, 0.02822, 
0.02419, 0.00383, 0.02401, 0.00727, 0.0126, 0.01663, 0.02291), 
    JHU_187.CEL = c(0.0052, 0.02163, 0.02401, 0.07845, 0.0028, 
    0.01582, 0.008, 0.0432, 0.01312, 0.03307), JHU_188.CEL = c(0.01704, 
    0.01586, 0.06648, 0.09027, 0.00882, 0.21313, 0.03859, 0.1424, 
    0.01815, 0.01586), JHU_203.CEL = c(0.03273, 0.04264, 0.06823, 
    0.09449, 0.06553, 0.05349, 0.06481, 0.07518, 0.02777, 0.07511
    ), JHU_205.CEL = c(0.0672, 0.05689, 0.1278, 0.12526, 0.03327, 
    0.10091, 0.42112, 0.18431, 0.08595, 0.01157)), row.names = c(2315374L, 
2315376L, 2315587L, 2315588L, 2315591L, 2315595L, 2315598L, 2315603L, 
2315604L, 2315640L), class = "data.frame")

Upvotes: 0

Views: 4396

Answers (1)

AndrewGB
AndrewGB

Reputation: 16866

If you are only wanting to keep the rownames in e that occur in pf (or that don't occur, then use !rownames(e)), then you can just filter on the rownames:

library(tidyverse)

e %>% 
  filter(rownames(e) %in% rownames(pf))

Another possibility is to create a rownames column for both dataframes. Then, we can do the semi_join on the rownames (i.e., rn). Then, convert the rn column back to the rownames.

library(tidyverse)

list(e, pf) %>% 
  map(~ .x %>% 
        as.data.frame %>%
        rownames_to_column('rn')) %>% 
  reduce(full_join, by = 'rn') %>%
  column_to_rownames('rn')

Output

        JHU_113_2.CEL JHU_144.CEL JHU_173.CEL JHU_176R.CEL JHU_182.CEL JHU_186.CEL JHU_187.CEL JHU_188.CEL JHU_203.CEL
2315374       6.28274     6.79161     6.11265      6.13997     6.68056     6.48156     6.45415     6.04542     5.99176
2315376       5.81678     5.71165     6.02794      5.37082     5.95527     5.75999     5.87863     5.54830     6.35571
2315587       8.88557     8.95699     8.36898      8.28993     8.41361     8.64980     8.74305     8.31915     8.43548
2315588       6.28650     6.66750     6.07503      6.76625     6.19819     6.84260     6.13916     6.40219     6.45059
2315591       6.97515     6.61705     6.51994      6.74982     6.60917     6.55182     6.62240     6.44394     5.76592
2315595       5.94179     5.39178     5.09497      4.96199     2.96431     4.95204     5.00979     4.06493     5.38048
2315598       4.99420     5.56888     5.57912      5.43960     5.19249     5.87991     5.60540     5.09513     5.43618
2315603       7.67845     7.90005     7.47594      6.75087     7.62805     8.00069     7.34296     6.81338     7.52014
2315604       6.20952     6.59687     6.14608      5.70518     6.49572     6.12622     6.23690     6.39569     6.70869
2315640       5.85307     6.07303     6.41875      6.07282     6.28283     6.13699     6.16377     6.48616     6.34162

Upvotes: 2

Related Questions