MNU
MNU

Reputation: 764

Swapping the values in columns with the condition of another column in R

For the following data, if the p.2 values is greater than 0.5, I would like to swap the values of column-1(b.1.1) and column-2 b.1.2, column-3(b.2.1) and column-4 b.2.2 , column-5(b.3.1) and column-6 b.3.2

 mydata
            b.1.1       b.1.2      b.2.1      b.2.2     b.3.1      b.3.2        p.1       p.2
    1  0.40772028  0.43064446  0.2697412  0.9191535 0.1523922  0.7629324 0.86061981 0.1393802
    2 -0.77459375  0.79860856 -0.5263932 -0.5640303 0.5131236  0.6472614 0.63494425 0.3650557
    3 -0.06088828  0.42685669 -1.0643744  0.8330836 0.1184059  0.6661079 0.07382585 0.9261742
    4  1.54204242 -0.08987067 -0.7365012  0.3762336 0.3781115 -0.7340340 0.65481949 0.3451805
    5 -0.73397310  1.34927693  0.2202689  0.2422944 1.5267535 -0.5207967 0.54425551 0.4557445

For example, in the first row, between p.1 and p.2, p.2 is not greater than 0.5, I am not swapping any values of this row. In the third row, p.2 is greater than 0.5, So, I would like to swap the row values of each beta pair written above. Any help is appreciated.

Upvotes: 1

Views: 1348

Answers (2)

mikeHoncho
mikeHoncho

Reputation: 317

Another option using just base without regex would be

reorderRows <- function(x, nullFrame){
  for(i in length(x)){
    if(x[i, 8] > 0.5)x = x[c(2,1,4,3,6,5,7,8)]
    nullFrame <- rbind(nullFrame, x)
  }
  return(nullFrame)
}

Applied:

dat <- data.frame(matrix(rnorm(80), ncol = 8))
colnames(dat) <- c("b.1.1", "b.1.2", "b.2.1", "b.2.2", 
                   "b.3.1", "b.3.2", "p.1", "p.2")

emptyFrame <- NULL
dat2 <- reorderRows(dat, emptyFrame)

It is probably a lot slower than the previous answer, but for a small dataset may be more easily modifiable

Upvotes: 2

akrun
akrun

Reputation: 887118

Here is an option. Create an index to subset the columns that starts with 'b' ('i1'), and another index to subset the rows ('i2'). Then, split the dataset into a list of datasets based on the similarity of column names after subsetting the rows ('i2'), loop through the list, reverse the elements, cbind the list of data.frames and update that to the original dataset rows/columns

i1 <-  startsWith(names(mydata), "b")
i2 <- mydata$p.2 > 0.5
mydata[i2, i1] <- do.call(cbind, 
            lapply(split.default(mydata[i2, i1, drop = FALSE],
         sub("\\.\\d+$", "", names(mydata)[i1])), rev))
mydata
#       b.1.1       b.1.2      b.2.1      b.2.2     b.3.1      b.3.2        p.1       p.2
#1  0.4077203  0.43064446  0.2697412  0.9191535 0.1523922  0.7629324 0.86061981 0.1393802
#2 -0.7745937  0.79860856 -0.5263932 -0.5640303 0.5131236  0.6472614 0.63494425 0.3650557
#3  0.4268567 -0.06088828  0.8330836 -1.0643744 0.6661079  0.1184059 0.07382585 0.9261742
#4  1.5420424 -0.08987067 -0.7365012  0.3762336 0.3781115 -0.7340340 0.65481949 0.3451805
#5 -0.7339731  1.34927693  0.2202689  0.2422944 1.5267535 -0.5207967 0.54425551 0.4557445

Another option is tidyverse, where we convert into 'long' format, do the transformation in that form, and reshape back to 'wide' format

library(dplyr)
library(tidyr)
library(stringr)
library(tibble)
mydata %>% 
   rownames_to_column('rn') %>%
   pivot_longer(cols = -c(rn, p.1, p.2)) %>%
   group_by(rn, grp = str_remove(name, "\\.\\d+$")) %>% 
   mutate(value = case_when(p.2 > 0.5 ~ rev(value), TRUE ~ value)) %>% 
   ungroup %>% 
   select(-grp) %>% 
   pivot_wider(names_from = name, values_from = value) %>%
   select(names(mydata)) 
# A tibble: 5 x 8
#   b.1.1   b.1.2  b.2.1  b.2.2 b.3.1  b.3.2    p.1   p.2
#   <dbl>   <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl> <dbl>
#1  0.408  0.431   0.270  0.919 0.152  0.763 0.861  0.139
#2 -0.775  0.799  -0.526 -0.564 0.513  0.647 0.635  0.365
#3  0.427 -0.0609  0.833 -1.06  0.666  0.118 0.0738 0.926
#4  1.54  -0.0899 -0.737  0.376 0.378 -0.734 0.655  0.345
#5 -0.734  1.35    0.220  0.242 1.53  -0.521 0.544  0.456

data

mydata <- structure(list(b.1.1 = c(0.40772028, -0.77459375, -0.06088828, 
1.54204242, -0.7339731), b.1.2 = c(0.43064446, 0.79860856, 0.42685669, 
-0.08987067, 1.34927693), b.2.1 = c(0.2697412, -0.5263932, -1.0643744, 
-0.7365012, 0.2202689), b.2.2 = c(0.9191535, -0.5640303, 0.8330836, 
0.3762336, 0.2422944), b.3.1 = c(0.1523922, 0.5131236, 0.1184059, 
0.3781115, 1.5267535), b.3.2 = c(0.7629324, 0.6472614, 0.6661079, 
-0.734034, -0.5207967), p.1 = c(0.86061981, 0.63494425, 0.07382585, 
0.65481949, 0.54425551), p.2 = c(0.1393802, 0.3650557, 0.9261742, 
0.3451805, 0.4557445)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5"))

Upvotes: 1

Related Questions