Austin Overman
Austin Overman

Reputation: 163

How to sample across rows using R

I have a table where each row contains 180 observations of the same measure each in their own column, like this:

group  category  measure1  measure2  measure3  ...  measure180
    A         z        56        55        57               55
    A         x        54        55        55               54
    B         z        53        56        54               55 
    B         x        58        55        53               57

and so on

How would I randomly sample x number measures from each row, without replacement, order is irrelevant.

So if x = 2 I would get:

group  category  sample1  sample2  
    A         z       55       57              
    A         x       55       54
    B         z       54       56 
    B         x       55       53 

Upvotes: 1

Views: 125

Answers (4)

akrun
akrun

Reputation: 887871

We can also do this in tidyverse with pmap to sample the elements of 'measure' columns from each row of the dataset

library(tidyverse)
n <- 2
nm1 <- str_c("sample", seq_len(n))
df %>% 
   transmute(group, category,
             out = pmap(select(., starts_with('measure')), ~ 
                     sample(c(...), n , replace = FALSE) %>% 
                        as.list %>% 
                        set_names(nm1) %>% 
                        as_tibble)) %>% 
    unnest(cols = out)
# A tibble: 4 x 4
#  group category sample1 sample2
#  <fct> <fct>      <int>   <int>
#1 A     z             56      55
#2 A     x             54      55
#3 B     z             56      54
#4 B     x             58      53

data

df <- structure(list(group = structure(c(1L, 1L, 2L, 2L), .Label = 
c("A", "B"), class = "factor"), category = structure(c(2L, 1L, 2L, 1L
), .Label = c("x", "z"), class = "factor"), measure1 = c(56L, 
54L, 53L, 58L), measure2 = c(55L, 55L, 56L, 55L), measure3 = c(57L, 
55L, 54L, 53L), measure180 = c(55L, 54L, 55L, 57L)), class = 
"data.frame", row.names = c(NA, -4L))

Upvotes: 1

Ronak Shah
Ronak Shah

Reputation: 389245

In base R, we can use apply rowwise to select x values from each row

cols <- grep("^measure", names(df))
x <- 2
cbind(df[-cols], t(apply(df[cols], 1, sample, x)))

#  group category  1  2
#1     A        z 56 55
#2     A        x 54 54
#3     B        z 54 53
#4     B        x 55 53

Another option using dplyr and tidyr

library(dplyr)
library(tidyr)

df %>%
  mutate(row = row_number()) %>%
  gather(key, value, starts_with("measure")) %>%
  group_by(row) %>%
  sample_n(x) %>%
  mutate(key = paste0("sample", 1:x)) %>%
  spread(key, value) %>%
  ungroup() %>%
  select(-row)

data

df <- structure(list(group = structure(c(1L, 1L, 2L, 2L), .Label = 
c("A", "B"), class = "factor"), category = structure(c(2L, 1L, 2L, 1L
), .Label = c("x", "z"), class = "factor"), measure1 = c(56L, 
54L, 53L, 58L), measure2 = c(55L, 55L, 56L, 55L), measure3 = c(57L, 
55L, 54L, 53L), measure180 = c(55L, 54L, 55L, 57L)), class = 
"data.frame", row.names = c(NA, -4L))

Upvotes: 1

Waschi Waschoi
Waschi Waschoi

Reputation: 183

   library(tidyverse)
   tibble::tribble(
   ~group, ~category, ~measure1, ~measure2, ~measure3, ~measure180,
     "A",       "z",        56,        55,        57,          55,
     "A",       "x",        54,        55,        55,          54,
     "B",       "z",        53,        56,        54,          55,
     "B",       "x",        58,        55,        53,          57
     )%>%
  gather(measure, value, starts_with("measure"))%>%
  group_by(measure)%>%
  nest()%>%
  sample_n(2)%>% #your x
  unnest(data)

Upvotes: 2

Yifu Yan
Yifu Yan

Reputation: 6116

You can sample by column numbers

# make sample data
df <- 
cbind(
    data.frame(group = rep(head(letters,5),2),
               category = rep(tail(letters,5),2)),
    data.frame(matrix(sample(c(0,1),100,replace = TRUE),10,10))
)

# the number of columns you want to sample
x <- 2

# skip first two columns (group and category)
column_numbers <- (x + 1):ncol(df)
columns_sampled <- sample(column_numbers,x)

Result:

> df[,columns_sampled]
   X9 X4
1   1  1
2   1  1
3   0  1
4   0  1
5   0  0
6   1  1
7   1  1
8   1  0
9   0  0
10  1  1

Upvotes: 1

Related Questions