Reputation: 1120
In general I have two data frames:
Main one:
structure(list(mpg = c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3,
24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4,
30.4, 33.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8,
19.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8,
8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4),
disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8,
167.6, 167.6, 275.8, 275.8, 275.8, 472, 460, 440, 78.7, 75.7,
71.1, 120.1, 318, 304, 350, 400, 79, 120.3, 95.1, 351, 145,
301, 121), hp = c(110, 110, 93, 110, 175, 105, 245, 62, 95,
123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150,
150, 245, 175, 66, 91, 113, 264, 175, 335, 109), drat = c(3.9,
3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,
3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76,
3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11
), wt = c(2.62, 2.875, 2.32, 3.215, 3.44, 3.46, 3.57, 3.19,
3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2,
1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14,
1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46, 17.02, 18.61,
19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3, 18.9, 17.4, 17.6,
18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87,
17.3, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6
), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), am = c(1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), gear = c("a", "b", "c",
"d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
"p", "r", "s", "t", "u", "w", "z", "a1", "b1", "c1", "d1",
"e1", "f1", "g1", "h1", "i1"), carb = c("Mazda RX4", "Mazda RX4 Wag",
"Datsun 710", "Hornet 4 Drive", "Hornet Sportabout", "Valiant",
"Duster 360", "Merc 240D", "Merc 230", "Merc 280", "Merc 280C",
"Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood",
"Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic",
"Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin",
"Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2",
"Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora",
"Volvo 142E")), .Names = c("mpg", "cyl", "disp", "hp", "drat",
"wt", "qsec", "vs", "am", "gear", "carb"), row.names = c(NA,
-32L), class = "data.frame")
Table with names:
structure(list(car = structure(c(18L, 18L, 19L, 19L, 5L, 13L,
14L, 31L, 7L, 21L, 20L, 22L, 23L, 24L, 25L, 26L, 2L, 15L, 4L,
9L, 12L, 29L, 30L, 6L, 1L, 3L, 27L, 10L, 28L, 16L, 11L, 11L,
8L, 17L, 32L, 32L), .Label = c("AMC Javelin", "Cadillac Fleetwood",
"Camaro Z28", "Chrysler Imperial", "Datsun 710", "Dodge Challenger",
"Duster 360", "Ferrari Dino", "Fiat 128", "Fiat X1-9", "Ford Pantera L",
"Honda Civic", "Hornet 4 Drive", "Hornet Sportabout", "Lincoln Continental",
"Lotus Europa", "Maserati Bora", "Mazda RX4", "Mazda RX4 Wag",
"Merc 230", "Merc 240D", "Merc 280", "Merc 280C", "Merc 450SE",
"Merc 450SL", "Merc 450SLC", "Pontiac Firebird", "Porsche 914-2",
"Toyota Corolla", "Toyota Corona", "Valiant", "Volvo 142E"), class = "factor"),
owner = structure(c(15L, 25L, 9L, 8L, 4L, 7L, 31L, 22L, 17L,
6L, 12L, 32L, 12L, 19L, 1L, 3L, 11L, 20L, 26L, 27L, 24L,
29L, 16L, 1L, 2L, 13L, 9L, 26L, 23L, 10L, 18L, 14L, 30L,
28L, 5L, 21L), .Label = c("Amid", "Armin", "Crane", "Dietmar",
"Gared", "Gratea", "Hank", "Hannea", "Hans", "Heta", "Horse_with_no_name",
"Jeff", "Krea", "Marea", "Mark", "Mattheus", "Micha", "Miko",
"Myrcella", "Neil", "Nina", "Peter", "Rene", "Robert", "Steffan",
"Tim", "Timon", "Timothy", "Uwe", "Vincent", "Wolfram", "Yena"
), class = "factor")), .Names = c("car", "owner"), row.names = c(NA,
-36L), class = "data.frame")
So I would like to add additional column to my main data frame with the name of the owner
. The owners
of each of the car you can find in the second data.
BUT:
As you already noticed some of the cars might have different owners. I don't want to create additional rows in my main data frame so I would like to put two/three or more owners in the same row but separated by coma for example.
Upvotes: 1
Views: 87
Reputation: 3121
I'd make your second dataframe a data table (which I call dt2) and then you can do:
dt2 <- dt2[ , list( 'owner' = toString(owner)) , by = car ]
And then just join that onto your first data table.
Edit: I do like the addition of "toString" by @akrun
Upvotes: 3
Reputation: 887501
Try
library(data.table)
res <- setDT(df1, key='car')[df2][,c(.SD[1L],
list(ownerN=toString(owner))), car][,owner:=NULL]
head(res,2)
# car mpg cyl disp hp drat wt qsec vs am gear carb
#1: Mazda RX4 21 6 160 110 3.9 2.620 16.46 0 1 4 4
#2: Mazda RX4 Wag 21 6 160 110 3.9 2.875 17.02 0 1 4 4
# ownerN
#1: Mark, Steffan
#2: Hans, Hannea
Or alternatively paste
the 'owner' by 'car' in 'df2' and join with the 'df1'
dt2 <- setDT(df2)[, .(owner=toString(owner)), by= car]
setkey(setDT(df1), car)[dt2]
df1 <- cbind(mtcars, car=row.names(mtcars))
row.names(df1) <- NULL
df2[] <- lapply(df2, as.character)
Upvotes: 3
Reputation: 7840
I don't know why you would do that but if you want to regroup owners in the same row and separate them by a comma :
library(dplyr)
df2 %>%
group_by(car) %>%
summarise(owner = paste(owner, collapse = ", ")) %>%
inner_join(df1, by = c("car" = "carb"))
# Which gives for example :
car owner mpg cyl disp hp drat wt qsec vs am gear
1 Mazda RX4 Mark, Steffan 21 6 160 110 3.9 2.62 16.46 0 1 4
Upvotes: 2