Meso
Meso

Reputation: 1435

Add a group of rows to a data frame a number of times

I have a data frame with grouped rows and columns and want to add the group of rows as a whole a number of times based on information contained in another data frame. In the example data I have shown the final output I wish to have by duplicating rows with id 1 (January 1, 1990) three times. variable "dups" indicates how many times the group should be duplicated.

Here is a sample dput:

> dput(df)
structure(list(date = c("01-jan-90", "01-jan-90", "01-jan-90", 
"01-jan-90", "01-jan-90", "02-jan-90", "02-jan-90", "02-jan-90", 
"02-jan-90", "02-jan-90"), rdate = c("08-jan-90", "15-jan-90", 
"01-jan-90", "22-jan-90", "29-jan-90", "09-jan-90", "16-jan-90", 
"02-jan-90", "23-jan-90", "30-jan-90"), id = c(1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L)), datalabel = "", time.stamp = "30 Dec 2013 16:39", .Names = c("date", 
"rdate", "id"), formats = c("%9s", "%9s", "%8.0g"), types = c(9L, 
9L, 251L), val.labels = c("", "", ""), var.labels = c("", "", 
""), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", 
"10"), version = 12L, class = "data.frame")

And information on date and the number of times to duplicate is found below:

date    dups    id
01-jan-90   3   1
02-jan-90   2   2

Output for ID 1 on January 1, 1990 looks like this:

      date     rdate id newid
1  01-jan-90 08-jan-90  1     1
2  01-jan-90 15-jan-90  1     1
3  01-jan-90 01-jan-90  1     1
4  01-jan-90 22-jan-90  1     1
5  01-jan-90 29-jan-90  1     1
6  01-jan-90 08-jan-90  1     2
7  01-jan-90 15-jan-90  1     2
8  01-jan-90 01-jan-90  1     2
9  01-jan-90 22-jan-90  1     2
10 01-jan-90 29-jan-90  1     2
11 01-jan-90 08-jan-90  1     3
12 01-jan-90 15-jan-90  1     3
13 01-jan-90 01-jan-90  1     3
14 01-jan-90 22-jan-90  1     3
15 01-jan-90 29-jan-90  1     3

Upvotes: 0

Views: 195

Answers (3)

A5C1D2H2I1M1N2O1R2T1
A5C1D2H2I1M1N2O1R2T1

Reputation: 193497

Here's a fairly straightforward approach using merge:

First, we'll assume your "duplicator" data.frame is called "mydf". Second, merge, create a sequence of your dups column, "expand" your rows, and bind your new ID back in.

A <- merge(df, mydf)
newID <- sequence(A$dups)
out <- cbind(A[rep(rownames(A), A$dups), c("date", "id", "rdate")], newID)
head(out)
#          date id     rdate newID
# 1   01-jan-90  1 08-jan-90     1
# 1.1 01-jan-90  1 08-jan-90     2
# 1.2 01-jan-90  1 08-jan-90     3
# 2   01-jan-90  1 15-jan-90     1
# 2.1 01-jan-90  1 15-jan-90     2
# 2.2 01-jan-90  1 15-jan-90     3

The order is not the same as your sample output, but that's pretty easily fixed by using order, as follows:

outordered <- out[with(out, do.call(order, list(date, id, newID))), ]
head(outordered, 10)
#          date id     rdate newID
# 1   01-jan-90  1 08-jan-90     1
# 2   01-jan-90  1 15-jan-90     1
# 3   01-jan-90  1 01-jan-90     1
# 4   01-jan-90  1 22-jan-90     1
# 5   01-jan-90  1 29-jan-90     1
# 1.1 01-jan-90  1 08-jan-90     2
# 2.1 01-jan-90  1 15-jan-90     2
# 3.1 01-jan-90  1 01-jan-90     2
# 4.1 01-jan-90  1 22-jan-90     2
# 5.1 01-jan-90  1 29-jan-90     2
tail(outordered)
#           date id     rdate newID
# 10   02-jan-90  2 30-jan-90     1
# 6.1  02-jan-90  2 09-jan-90     2
# 7.1  02-jan-90  2 16-jan-90     2
# 8.1  02-jan-90  2 02-jan-90     2
# 9.1  02-jan-90  2 23-jan-90     2
# 10.1 02-jan-90  2 30-jan-90     2

Upvotes: 1

BrodieG
BrodieG

Reputation: 52637

# Create the data.frame with the number of duplications
# assuming it is unique on date/id

id <- unique(df[, c("date", "id")])
id$dups <- c(3, 2)
library(plyr)

# Use ddply to cycle through the id df, and lapply to
# duplicate the df

ddply(id, c("date", "id"), 
  function(df.sub) {
    if(nrow(df.sub) != 1L) stop("Logic error") # id df shoudl be unique on date/id
    df.to.rep <- subset(df, date==df.sub$date & id==df.sub$id)
    df.rep <- lapply(1:df.sub$dups, function(newid) transform(df.to.rep, newid=newid))
    do.call(rbind, df.rep)
  }
)

#         date     rdate id newid
# 1  01-jan-90 08-jan-90  1     1
# 2  01-jan-90 15-jan-90  1     1
# 3  01-jan-90 01-jan-90  1     1
# 4  01-jan-90 22-jan-90  1     1
# 5  01-jan-90 29-jan-90  1     1
# 6  01-jan-90 08-jan-90  1     2
# 7  01-jan-90 15-jan-90  1     2
# 8  01-jan-90 01-jan-90  1     2
# 9  01-jan-90 22-jan-90  1     2
# 10 01-jan-90 29-jan-90  1     2
# 11 01-jan-90 08-jan-90  1     3
# 12 01-jan-90 15-jan-90  1     3
# 13 01-jan-90 01-jan-90  1     3
# 14 01-jan-90 22-jan-90  1     3
# 15 01-jan-90 29-jan-90  1     3
# 16 02-jan-90 09-jan-90  2     1
# 17 02-jan-90 16-jan-90  2     1
# 18 02-jan-90 02-jan-90  2     1
# 19 02-jan-90 23-jan-90  2     1
# 20 02-jan-90 30-jan-90  2     1
# 21 02-jan-90 09-jan-90  2     2
# 22 02-jan-90 16-jan-90  2     2
# 23 02-jan-90 02-jan-90  2     2
# 24 02-jan-90 23-jan-90  2     2
# 25 02-jan-90 30-jan-90  2     2

Upvotes: 1

lukeA
lukeA

Reputation: 54237

df2 <- read.table(textConnection("date dups id
01-jan-90 3 1
02-jan-90 2 2"), sep=" ", header=T)

dup <- function(df, date, num) {
  i <- which(df$date==date)
  cbind(df[rep(i,num),], newid=rep(1:num, each=length(i)))
}

df3 <- data.frame()
for (x in 1:nrow(df2)) {
  df3 <- rbind(df3, dup(df, df2$date[x], df2$dups[x]))
}
df3
#           date     rdate id newid
# 1    01-jan-90 08-jan-90  1     1
# 2    01-jan-90 15-jan-90  1     1
# 3    01-jan-90 01-jan-90  1     1
# 4    01-jan-90 22-jan-90  1     1
# 5    01-jan-90 29-jan-90  1     1
# 1.1  01-jan-90 08-jan-90  1     2
# 2.1  01-jan-90 15-jan-90  1     2
# 3.1  01-jan-90 01-jan-90  1     2
# 4.1  01-jan-90 22-jan-90  1     2
# 5.1  01-jan-90 29-jan-90  1     2
# 1.2  01-jan-90 08-jan-90  1     3
# 2.2  01-jan-90 15-jan-90  1     3
# 3.2  01-jan-90 01-jan-90  1     3
# 4.2  01-jan-90 22-jan-90  1     3
# 5.2  01-jan-90 29-jan-90  1     3
# 6    02-jan-90 09-jan-90  2     1
# 7    02-jan-90 16-jan-90  2     1
# 8    02-jan-90 02-jan-90  2     1
# 9    02-jan-90 23-jan-90  2     1
# 10   02-jan-90 30-jan-90  2     1
# 6.1  02-jan-90 09-jan-90  2     2
# 7.1  02-jan-90 16-jan-90  2     2
# 8.1  02-jan-90 02-jan-90  2     2
# 9.1  02-jan-90 23-jan-90  2     2
# 10.1 02-jan-90 30-jan-90  2     2

Upvotes: 1

Related Questions