AK88
AK88

Reputation: 3026

Find max of rows from specific columns and extract column name and corresponding row value from another column

Here is a data structure that I have:

structure(list(UDD_beta = c(1.17136554204268, 0.939587997289016
), UDD_pval = c(0, 0), UDD_R.sq = c(0.749044972637797, 0.516943886705951
), SSX_beta = c(1.05356804780772, 0.927948300464624), SSX_pval = c(0, 
0), SSX_R.sq = c(0.60226298037862, 0.629111666509209), SPP_beta = c(0.675765151939885, 
0.516425218613404), SPP_pval = c(0, 0), SPP_R.sq = c(0.479849538274406, 
0.378266618442121), EEE_beta = c(0.690521022226874, 0.639380962824289
), EEE_pval = c(0, 0), EEE_R.sq = c(0.585610742768951, 0.676073352909597
)), .Names = c("UDD_beta", "UDD_pval", "UDD_R.sq", "SSX_beta", 
"SSX_pval", "SSX_R.sq", "SPP_beta", "SPP_pval", "SPP_R.sq", 
"EEE_beta", "EEE_pval", "EEE_R.sq"), row.names = c("DDK", "DDL"
), class = "data.frame")

I want to take R.sq columns and for each row find the max and the column name of the max value. Then take corresponding beta. Expected output:

    Name Value
DDK UDD 1.17136554204268
DDL EEE 0.690521022226874

Sorry, the second expected value should be 0.639380962824289.

Upvotes: 1

Views: 836

Answers (4)

akrun
akrun

Reputation: 887118

We could use max.col. Subset the columns of interest i.e. columns that have 'R.sq' using the grep, then get the column index of max value with max.col. Use that to get the column names and also the values that correspond to a particular row (row/column indexing)

i1 <- grep("R.sq", names(df1))
i2 <- max.col(df1[i1], "first")
i3 <- grep("beta", names(df1))
res <- data.frame(Names = sub("_.*", "", names(df1)[i1][i2]), 
               Value = df1[i3][cbind(1:nrow(df1), i2)])
row.names(res) <- row.names(df1)

Upvotes: 2

Sotos
Sotos

Reputation: 51592

You can use a tidyverse approach via gathering your df to long and filtering both R.sq vars and max value, i.e.

library(tidyverse)

df %>% 
 rownames_to_column('ID') %>% 
 gather(var, val, -ID) %>% 
 filter(grepl('R.sq|beta', var)) %>% 
 group_by(ID) %>% 
 mutate(max1=as.integer(val == max(val[grepl('R.sq', var)]))) %>% 
 group_by(ID, grp = sub('_.*', '', var)) %>% 
 filter(!all(max1 == 0) & grepl('beta', var)) %>% 
 ungroup() %>% select(-c(max1, grp))

which gives,

# A tibble: 2 x 3
     ID      var      val
  <chr>    <chr>    <dbl>
1   DDK UDD_beta 1.171366
2   DDL EEE_beta 0.639381

Upvotes: 2

Patrik_P
Patrik_P

Reputation: 3200

sub_data <- data[grep("R.sq", colnames(data))]
colnames(sub_data) <- gsub("_R.sq", "", colnames(sub_data))
sub_data$Name <- NA
sub_data$Value <- NA
for (i in 1:nrow(sub_data)){
  sub_data$Name[i] <- names(sub_data[i,])[which.max(apply(sub_data[i,], 2, max))]
  sub_data$Value[i] <- max(data[grep(paste0(sub_data$Name[i], "_beta"), colnames(data))], na.rm=T)
}
sub_data[c("Name", "Value")]
#    Name    Value
#DDK  UDD 1.171366
#DDL  EEE 0.690521

Upvotes: 2

pogibas
pogibas

Reputation: 28339

# Need ID for all possible betas and Rsq
ID <- gsub("_R.sq", "", grep("_R.sq$", names(INPUT), value = TRUE))
dummy <- function(x) {
    # Find out which Rsq is largest
    i <- ID[which.max(x[paste0(ID, "_R.sq")])]
    # Extract beta for largest Rsq
    data.frame(Name = i, Value = x[paste0(i, "_beta")])
}
do.call("rbind", apply(INPUT, 1, dummy))

Upvotes: 1

Related Questions