JuanJMV
JuanJMV

Reputation: 145

How to order variables in blocks

I have a large data set with variables like this:

df <- data.frame(ID=c(1,2),
             Hei3ght1=c(180,192),
             Weight1=c(70,90),
             Hip1=c(25,29),
             hei5ght1=c(160,150),
             Hei3ght2=c(167,168),
             Weight2=c(50,50),
             Hip2=c(23,27),
             hei5ght2=c(160,150),
             Hei3ght3=c(175,176),
             Weight3=c(50,70),
             Hip3=c(28,28),
             hei5ght3=c(160,150))

I would like to order the variables as follows:

ID, Hei3ght1, Hei3ght2, Hei3ght3, Weight1, Weight2, Weight3, Hip1, Hip2, Hip3, Hei5ght1, Hei5ght2, Hei5ght3

I have tried with:

df <- df[sort(names(df))]

But I do not want all the variables alphabetically.

Thank you so much in advance.

UODATE 2

df <- data.frame(ID=c(1,2),
                 Hei3ght1=c(180,192),
                 Weight1=c(70,90),
                 Hip1=c(25,29),
                 hei5ght1=c(160,150),
                 hei5ght21=c(160,150),
                 Hei3ght2=c(167,168),
                 Weight2=c(50,50),
                 Hip2=c(23,27),
                 hei5ght2=c(160,150),
                 hei5ght22=c(160,150),
                 Hei3ght3=c(175,176),
                 Weight3=c(50,70),
                 Hip3=c(28,28),
                 hei5ght3=c(160,150),
                 hei5ght23=c(160,150))

Upvotes: 2

Views: 77

Answers (2)

akrun
akrun

Reputation: 887098

An option in base R would be to convert the column names to a matrix and then to a vector:

n <- length(unique(sub("_\\d+", "", names(df)[-1])))
df[c('ID', c(matrix(names(df)[-1], ncol = n, byrow = TRUE)))]

Output:

ID Height_1 Height_2 Height_3 Weight_1 Weight_2 Weight_3 Hip_1 Hip_2 Hip_3
1  1      180      167      175       70       50       50    25    23    28
2  2      192      168      176       90       50       70    29    27    28

Or you may use

library(data.table)
library(dplyr)
df %>%
   select(ID, order(rowid(readr::parse_number(names(.)[-1])))+1)

Output:

 ID Height_1 Height_2 Height_3 Weight_1 Weight_2 Weight_3 Hip_1 Hip_2 Hip_3
1  1      180      167      175       70       50       50    25    23    28
2  2      192      168      176       90       50       70    29    27    28

Update

For the updated data

library(stringr)
df %>% 
   select(ID, order(rowid(str_extract(names(.)[-1], "\\d+$")))+1)

Output:

  ID Hei3ght1 Hei3ght2 Hei3ght3 Weight1 Weight2 Weight3 Hip1 Hip2 Hip3 hei5ght1 hei5ght2 hei5ght3
1  1      180      167      175      70      50      50   25   23   28      160      160      160
2  2      192      168      176      90      50      70   29   27   28      150      150      150

Update2

df %>% 
    select(ID, order(rowid(str_extract(names(.)[-1], "\\d$")))+1)
  ID Hei3ght1 Hei3ght2 Hei3ght3 Weight1 Weight2 Weight3 Hip1 Hip2 Hip3 hei5ght1 hei5ght2 hei5ght3 hei5ght21 hei5ght22 hei5ght23
1  1      180      167      175      70      50      50   25   23   28      160      160      160       160       160       160
2  2      192      168      176      90      50      70   29   27   28      150      150      150       150       150       150

Upvotes: 3

TarJae
TarJae

Reputation: 78927

This is the long version of @akrun's solution, the core idea is to make pivot longer, transform to factor, and the arrange and pivot back:

library(tidyverse)

df %>% 
  pivot_longer(-ID) %>% 
  mutate(helper = str_replace_all(name, "[[:punct:]][0-9]+", ""),
         helper = factor(helper, levels = c("Height", "Weight", "Hip"))) %>% 
  group_by(ID) %>% 
  arrange(helper, .by_group = TRUE) %>% 
  select(-helper) %>% 
  pivot_wider(names_from = name, values_from = value)
     ID Height_1 Height_2 Height_3 Weight_1 Weight_2 Weight_3 Hip_1 Hip_2 Hip_3
  <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl> <dbl> <dbl> <dbl>
1     1      180      167      175       70       50       50    25    23    28
2     2      192      168      176       90       50       70    29    27    28

Upvotes: 1

Related Questions