Reputation: 163
I have a table where each row contains 180 observations of the same measure each in their own column, like this:
group category measure1 measure2 measure3 ... measure180
A z 56 55 57 55
A x 54 55 55 54
B z 53 56 54 55
B x 58 55 53 57
and so on
How would I randomly sample x number measures from each row, without replacement, order is irrelevant.
So if x = 2 I would get:
group category sample1 sample2
A z 55 57
A x 55 54
B z 54 56
B x 55 53
Upvotes: 1
Views: 125
Reputation: 887871
We can also do this in tidyverse
with pmap
to sample
the elements of 'measure' columns from each row of the dataset
library(tidyverse)
n <- 2
nm1 <- str_c("sample", seq_len(n))
df %>%
transmute(group, category,
out = pmap(select(., starts_with('measure')), ~
sample(c(...), n , replace = FALSE) %>%
as.list %>%
set_names(nm1) %>%
as_tibble)) %>%
unnest(cols = out)
# A tibble: 4 x 4
# group category sample1 sample2
# <fct> <fct> <int> <int>
#1 A z 56 55
#2 A x 54 55
#3 B z 56 54
#4 B x 58 53
df <- structure(list(group = structure(c(1L, 1L, 2L, 2L), .Label =
c("A", "B"), class = "factor"), category = structure(c(2L, 1L, 2L, 1L
), .Label = c("x", "z"), class = "factor"), measure1 = c(56L,
54L, 53L, 58L), measure2 = c(55L, 55L, 56L, 55L), measure3 = c(57L,
55L, 54L, 53L), measure180 = c(55L, 54L, 55L, 57L)), class =
"data.frame", row.names = c(NA, -4L))
Upvotes: 1
Reputation: 389245
In base R, we can use apply
rowwise to select x
values from each row
cols <- grep("^measure", names(df))
x <- 2
cbind(df[-cols], t(apply(df[cols], 1, sample, x)))
# group category 1 2
#1 A z 56 55
#2 A x 54 54
#3 B z 54 53
#4 B x 55 53
Another option using dplyr
and tidyr
library(dplyr)
library(tidyr)
df %>%
mutate(row = row_number()) %>%
gather(key, value, starts_with("measure")) %>%
group_by(row) %>%
sample_n(x) %>%
mutate(key = paste0("sample", 1:x)) %>%
spread(key, value) %>%
ungroup() %>%
select(-row)
data
df <- structure(list(group = structure(c(1L, 1L, 2L, 2L), .Label =
c("A", "B"), class = "factor"), category = structure(c(2L, 1L, 2L, 1L
), .Label = c("x", "z"), class = "factor"), measure1 = c(56L,
54L, 53L, 58L), measure2 = c(55L, 55L, 56L, 55L), measure3 = c(57L,
55L, 54L, 53L), measure180 = c(55L, 54L, 55L, 57L)), class =
"data.frame", row.names = c(NA, -4L))
Upvotes: 1
Reputation: 183
library(tidyverse)
tibble::tribble(
~group, ~category, ~measure1, ~measure2, ~measure3, ~measure180,
"A", "z", 56, 55, 57, 55,
"A", "x", 54, 55, 55, 54,
"B", "z", 53, 56, 54, 55,
"B", "x", 58, 55, 53, 57
)%>%
gather(measure, value, starts_with("measure"))%>%
group_by(measure)%>%
nest()%>%
sample_n(2)%>% #your x
unnest(data)
Upvotes: 2
Reputation: 6116
You can sample by column numbers
# make sample data
df <-
cbind(
data.frame(group = rep(head(letters,5),2),
category = rep(tail(letters,5),2)),
data.frame(matrix(sample(c(0,1),100,replace = TRUE),10,10))
)
# the number of columns you want to sample
x <- 2
# skip first two columns (group and category)
column_numbers <- (x + 1):ncol(df)
columns_sampled <- sample(column_numbers,x)
Result:
> df[,columns_sampled]
X9 X4
1 1 1
2 1 1
3 0 1
4 0 1
5 0 0
6 1 1
7 1 1
8 1 0
9 0 0
10 1 1
Upvotes: 1