Reputation: 464
I need to merge three separate DFs ("factors_sed", "resp", and "npoc_sed") based on the shared column "Samples". Each DF contains a different number of rows (some analyses had several replicates), but the total number I need is of the first DF, "factors_sed".
So far I have my first two dfs, "factors_sed" and "resp" (dput()
below). The 216 obs in "factors_sed" represent replicates. I would like to duplicate the columns in "resp" to match "factors_sed".
library(dplyr)
factors_try <- merge(factors_sed, resp, by="Samples", all=T)
but this gives me a df with 524 observations.
My second issue will be to merge a third DF "npoc_sed" (288 obs) to the new, correctly merged df made from "factors_sed" and "resp" by dropping duplicates. Essentially the opposite of the first question.
I know it will require the use of dplyr
's different joins, but honestly I'm pretty confused right now. I appreciate any help!
> dput(factors_sed)
structure(list(SampleID = c("Sample_S19S_0006_Sed_Field_ICR.D_P2",
"Sample_S19S_0006_Sed_Field_ICR.M_P2", "Sample_S19S_0006_Sed_Field_ICR.U_P2",
"Sample_S19S_0009_Sed_Field_ICR.M_P2", "Sample_S19S_0009_Sed_Field_ICR.U_P2",
"Sample_S19S_0011_Sed_Field_ICR.D_P2", "Sample_S19S_0011_Sed_Field_ICR.M_P2",
"Sample_S19S_0011_Sed_Field_ICR.U_P2", "Sample_S19S_0012_Sed_Field_ICR.D_P2",
"Sample_S19S_0012_Sed_Field_ICR.M_P2", "Sample_S19S_0012_Sed_Field_ICR.U_P2",
"Sample_S19S_0013_Sed_Field_ICR.D_P2", "Sample_S19S_0013_Sed_Field_ICR.M_P2",
"Sample_S19S_0013_Sed_Field_ICR.U_P2", "Sample_S19S_0014_Sed_Field_ICR.D_P2",
"Sample_S19S_0014_Sed_Field_ICR.M_P2", "Sample_S19S_0014_Sed_Field_ICR.U_P2",
"Sample_S19S_0015_Sed_Field_ICR.D_P2", "Sample_S19S_0015_Sed_Field_ICR.M_P2",
"Sample_S19S_0015_Sed_Field_ICR.U_P2", "Sample_S19S_0016_Sed_Field_ICR.D_P2",
"Sample_S19S_0016_Sed_Field_ICR.M_P2", "Sample_S19S_0016_Sed_Field_ICR.U_P1",
"Sample_S19S_0017_Sed_Field_ICR.D_P2", "Sample_S19S_0017_Sed_Field_ICR.M_P2",
"Sample_S19S_0017_Sed_Field_ICR.U_P2", "Sample_S19S_0018_Sed_Field_ICR.D_P1",
"Sample_S19S_0018_Sed_Field_ICR.M_P2", "Sample_S19S_0018_Sed_Field_ICR.U_P2",
"Sample_S19S_0019_Sed_Field_ICR.D_P2", "Sample_S19S_0019_Sed_Field_ICR.M_P2",
"Sample_S19S_0019_Sed_Field_ICR.U_P2", "Sample_S19S_0020_Sed_Field_ICR.D_P2",
"Sample_S19S_0020_Sed_Field_ICR.M_P2", "Sample_S19S_0020_Sed_Field_ICR.U_P2",
"Sample_S19S_0021_Sed_Field_ICR.D_P2", "Sample_S19S_0021_Sed_Field_ICR.M_P2",
"Sample_S19S_0021_Sed_Field_ICR.U_P2", "Sample_S19S_0022_Sed_Field_ICR.D_P2",
"Sample_S19S_0022_Sed_Field_ICR.M_P2", "Sample_S19S_0022_Sed_Field_ICR.U_P2",
"Sample_S19S_0023_Sed_Field_ICR.D_P2", "Sample_S19S_0023_Sed_Field_ICR.M_P2",
"Sample_S19S_0023_Sed_Field_ICR.U_P2", "Sample_S19S_0024_Sed_Field_ICR.M_P2",
"Sample_S19S_0025_Sed_Field_ICR.D_P2", "Sample_S19S_0025_Sed_Field_ICR.M_P1",
"Sample_S19S_0025_Sed_Field_ICR.U_P2", "Sample_S19S_0026_Sed_Field_ICR.M_P2",
"Sample_S19S_0026_Sed_Field_ICR.U_P2", "Sample_S19S_0027_Sed_Field_ICR.D_P2",
"Sample_S19S_0027_Sed_Field_ICR.M_P2", "Sample_S19S_0027_Sed_Field_ICR.U_P2",
"Sample_S19S_0028_Sed_Field_ICR.D_P2", "Sample_S19S_0028_Sed_Field_ICR.M_P1",
"Sample_S19S_0028_Sed_Field_ICR.U_P1", "Sample_S19S_0029_Sed_Field_ICR.D_P2",
"Sample_S19S_0029_Sed_Field_ICR.M_P2", "Sample_S19S_0029_Sed_Field_ICR.U_P2",
"Sample_S19S_0030_Sed_Field_ICR.U_P2", "Sample_S19S_0031_Sed_Field_ICR.D_P2",
"Sample_S19S_0031_Sed_Field_ICR.M_P2", "Sample_S19S_0031_Sed_Field_ICR.U_P2",
"Sample_S19S_0032_Sed_Field_ICR.D_P2", "Sample_S19S_0032_Sed_Field_ICR.M_P2",
"Sample_S19S_0032_Sed_Field_ICR.U_P2", "Sample_S19S_0034_Sed_Field_ICR.D_P2",
"Sample_S19S_0034_Sed_Field_ICR.M_P2", "Sample_S19S_0034_Sed_Field_ICR.U_P2",
"Sample_S19S_0035_Sed_Field_ICR.D_P2", "Sample_S19S_0035_Sed_Field_ICR.M_P1",
"Sample_S19S_0035_Sed_Field_ICR.U_P2", "Sample_S19S_0036_Sed_Field_ICR.D_P1",
"Sample_S19S_0036_Sed_Field_ICR.M_P2", "Sample_S19S_0036_Sed_Field_ICR.U_P2",
"Sample_S19S_0037_Sed_Field_ICR.U_P2", "Sample_S19S_0038_Sed_Field_ICR.D_P2",
"Sample_S19S_0038_Sed_Field_ICR.M_P2", "Sample_S19S_0039_Sed_Field_ICR.D_P2",
"Sample_S19S_0039_Sed_Field_ICR.M_P1", "Sample_S19S_0039_Sed_Field_ICR.U_P2",
"Sample_S19S_0040_Sed_Field_ICR.U_P2", "Sample_S19S_0041_Sed_Field_ICR.D_P2",
"Sample_S19S_0041_Sed_Field_ICR.M_P2", "Sample_S19S_0041_Sed_Field_ICR.U_P2",
"Sample_S19S_0042_Sed_Field_ICR.D_P2", "Sample_S19S_0042_Sed_Field_ICR.M_P2",
"Sample_S19S_0042_Sed_Field_ICR.U_P2", "Sample_S19S_0043_Sed_Field_ICR.D_P2",
"Sample_S19S_0043_Sed_Field_ICR.M_P2", "Sample_S19S_0043_Sed_Field_ICR.U_P2",
"Sample_S19S_0044_Sed_Field_ICR.D_P2", "Sample_S19S_0044_Sed_Field_ICR.U_P2",
"Sample_S19S_0046_Sed_Field_ICR.D_P1", "Sample_S19S_0046_Sed_Field_ICR.M_P1",
"Sample_S19S_0046_Sed_Field_ICR.U_P1", "Sample_S19S_0049_Sed_Field_ICR.D_P2",
"Sample_S19S_0049_Sed_Field_ICR.M_P2", "Sample_S19S_0049_Sed_Field_ICR.U_P2",
"Sample_S19S_0052_Sed_Field_ICR.D_P2", "Sample_S19S_0052_Sed_Field_ICR.M_P2",
"Sample_S19S_0052_Sed_Field_ICR.U_P2", "Sample_S19S_0053_Sed_Field_ICR.D_P2",
"Sample_S19S_0053_Sed_Field_ICR.M_P2", "Sample_S19S_0053_Sed_Field_ICR.U_P2",
"Sample_S19S_0054_Sed_Field_ICR.D_P2", "Sample_S19S_0054_Sed_Field_ICR.M_P2",
"Sample_S19S_0054_Sed_Field_ICR.U_P2", "Sample_S19S_0055_Sed_Field_ICR.D_P2",
"Sample_S19S_0055_Sed_Field_ICR.M_P2", "Sample_S19S_0055_Sed_Field_ICR.U_P2",
"Sample_S19S_0056_Sed_Field_ICR.D_P2", "Sample_S19S_0056_Sed_Field_ICR.M_P2",
"Sample_S19S_0056_Sed_Field_ICR.U_P2", "Sample_S19S_0057_Sed_Field_ICR.D_P2",
"Sample_S19S_0057_Sed_Field_ICR.M_P2", "Sample_S19S_0057_Sed_Field_ICR.U_P2",
"Sample_S19S_0058_Sed_Field_ICR.D_P2", "Sample_S19S_0058_Sed_Field_ICR.U_P2",
"Sample_S19S_0060_Sed_Field_ICR.D_P2", "Sample_S19S_0060_Sed_Field_ICR.M_P2",
"Sample_S19S_0060_Sed_Field_ICR.U_P2", "Sample_S19S_0061_Sed_Field_ICR.D_P2",
"Sample_S19S_0061_Sed_Field_ICR.M_P2", "Sample_S19S_0063_Sed_Field_ICR.M_P2",
"Sample_S19S_0063_Sed_Field_ICR.U_P2", "Sample_S19S_0064_Sed_Field_ICR.D_P2",
"Sample_S19S_0064_Sed_Field_ICR.M_P2", "Sample_S19S_0065_Sed_Field_ICR.D_P2",
"Sample_S19S_0065_Sed_Field_ICR.M_P2", "Sample_S19S_0065_Sed_Field_ICR.U_P2",
"Sample_S19S_0066_Sed_Field_ICR.D_P2", "Sample_S19S_0066_Sed_Field_ICR.M_P2",
"Sample_S19S_0066_Sed_Field_ICR.U_P2", "Sample_S19S_0067_Sed_Field_ICR.D_P2",
"Sample_S19S_0067_Sed_Field_ICR.M_P2", "Sample_S19S_0067_Sed_Field_ICR.U_P2",
"Sample_S19S_0068_Sed_Field_ICR.M_P2", "Sample_S19S_0068_Sed_Field_ICR.U_P2",
"Sample_S19S_0069_Sed_Field_ICR.D_P2", "Sample_S19S_0069_Sed_Field_ICR.M_P2",
"Sample_S19S_0069_Sed_Field_ICR.U_P2", "Sample_S19S_0070_Sed_Field_ICR.D_P2",
"Sample_S19S_0070_Sed_Field_ICR.M_P2", "Sample_S19S_0070_Sed_Field_ICR.U_P2",
"Sample_S19S_0071_Sed_Field_ICR.D_P2", "Sample_S19S_0071_Sed_Field_ICR.M_P2",
"Sample_S19S_0071_Sed_Field_ICR.U_P2", "Sample_S19S_0072_Sed_Field_ICR.D_P2",
"Sample_S19S_0072_Sed_Field_ICR.M_P2", "Sample_S19S_0072_Sed_Field_ICR.U_P2",
"Sample_S19S_0073_Sed_Field_ICR.D_P2", "Sample_S19S_0073_Sed_Field_ICR.M_P2",
"Sample_S19S_0073_Sed_Field_ICR.U_P2", "Sample_S19S_0074_Sed_Field_ICR.D_P2",
"Sample_S19S_0074_Sed_Field_ICR.M_P2", "Sample_S19S_0074_Sed_Field_ICR.U_P2",
"Sample_S19S_0075_Sed_Field_ICR.D_P2", "Sample_S19S_0075_Sed_Field_ICR.M_P2",
"Sample_S19S_0075_Sed_Field_ICR.U_P2", "Sample_S19S_0076_Sed_Field_ICR.D_P2",
"Sample_S19S_0076_Sed_Field_ICR.M_P1", "Sample_S19S_0076_Sed_Field_ICR.U_P1",
"Sample_S19S_0077_Sed_Field_ICR.D_P2", "Sample_S19S_0077_Sed_Field_ICR.M_P2",
"Sample_S19S_0077_Sed_Field_ICR.U_P2", "Sample_S19S_0078_Sed_Field_ICR.D_P2",
"Sample_S19S_0078_Sed_Field_ICR.M_P2", "Sample_S19S_0078_Sed_Field_ICR.U_P2",
"Sample_S19S_0079_Sed_Field_ICR.D_P2", "Sample_S19S_0079_Sed_Field_ICR.M_P2",
"Sample_S19S_0079_Sed_Field_ICR.U_P2", "Sample_S19S_0080_Sed_Field_ICR.D_P2",
"Sample_S19S_0080_Sed_Field_ICR.M_P2", "Sample_S19S_0080_Sed_Field_ICR.U_P2",
"Sample_S19S_0081_Sed_Field_ICR.M_P2", "Sample_S19S_0081_Sed_Field_ICR.U_P2",
"Sample_S19S_0082_Sed_Field_ICR.D_P2", "Sample_S19S_0082_Sed_Field_ICR.M_P2",
"Sample_S19S_0082_Sed_Field_ICR.U_P2", "Sample_S19S_0083_Sed_Field_ICR.D_P2",
"Sample_S19S_0083_Sed_Field_ICR.M_P2", "Sample_S19S_0083_Sed_Field_ICR.U_P2",
"Sample_S19S_0084_Sed_Field_ICR.D_P2", "Sample_S19S_0084_Sed_Field_ICR.M_P2",
"Sample_S19S_0084_Sed_Field_ICR.U_P2", "Sample_S19S_0086_Sed_Field_ICR.D_P2",
"Sample_S19S_0086_Sed_Field_ICR.M_P2", "Sample_S19S_0086_Sed_Field_ICR.U_P2",
"Sample_S19S_0089_Sed_Field_ICR.D_P2", "Sample_S19S_0089_Sed_Field_ICR.M_P2",
"Sample_S19S_0089_Sed_Field_ICR.U_P2", "Sample_S19S_0090_Sed_Field_ICR.D_P2",
"Sample_S19S_0090_Sed_Field_ICR.M_P2", "Sample_S19S_0090_Sed_Field_ICR.U_P2",
"Sample_S19S_0091_Sed_Field_ICR.D_P2", "Sample_S19S_0091_Sed_Field_ICR.M_P2",
"Sample_S19S_0091_Sed_Field_ICR.U_P2", "Sample_S19S_0092_Sed_Field_ICR.D_P2",
"Sample_S19S_0092_Sed_Field_ICR.M_P1", "Sample_S19S_0092_Sed_Field_ICR.U_P2",
"Sample_S19S_0096_Sed_Field_ICR.M_P2", "Sample_S19S_0097_Sed_Field_ICR.D_P2",
"Sample_S19S_0097_Sed_Field_ICR.U_P2", "Sample_S19S_0098_Sed_Field_ICR.D_P2",
"Sample_S19S_0098_Sed_Field_ICR.M_P2", "Sample_S19S_0098_Sed_Field_ICR.U_P2",
"Sample_S19S_0099_Sed_Field_ICR.D_P2", "Sample_S19S_0099_Sed_Field_ICR.M_P2",
"Sample_S19S_0099_Sed_Field_ICR.U_P2", "Sample_S19S_0100_Sed_Field_ICR.D_P2",
"Sample_S19S_0100_Sed_Field_ICR.M_P2", "Sample_S19S_0100_Sed_Field_ICR.U_P2",
"Sample_S19S_0001_Sed_Field_ICR.D_P2", "Sample_S19S_0001_Sed_Field_ICR.M_P2",
"Sample_S19S_0001_Sed_Field_ICR.U_P2"), Type = c("Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment", "Sediment",
"Sediment", "Sediment", "Sediment", "Sediment", "Sediment"),
Samples = c("S19S_0006", "S19S_0006", "S19S_0006", "S19S_0009",
"S19S_0009", "S19S_0011", "S19S_0011", "S19S_0011", "S19S_0012",
"S19S_0012", "S19S_0012", "S19S_0013", "S19S_0013", "S19S_0013",
"S19S_0014", "S19S_0014", "S19S_0014", "S19S_0015", "S19S_0015",
"S19S_0015", "S19S_0016", "S19S_0016", "S19S_0016", "S19S_0017",
"S19S_0017", "S19S_0017", "S19S_0018", "S19S_0018", "S19S_0018",
"S19S_0019", "S19S_0019", "S19S_0019", "S19S_0020", "S19S_0020",
"S19S_0020", "S19S_0021", "S19S_0021", "S19S_0021", "S19S_0022",
"S19S_0022", "S19S_0022", "S19S_0023", "S19S_0023", "S19S_0023",
"S19S_0024", "S19S_0025", "S19S_0025", "S19S_0025", "S19S_0026",
"S19S_0026", "S19S_0027", "S19S_0027", "S19S_0027", "S19S_0028",
"S19S_0028", "S19S_0028", "S19S_0029", "S19S_0029", "S19S_0029",
"S19S_0030", "S19S_0031", "S19S_0031", "S19S_0031", "S19S_0032",
"S19S_0032", "S19S_0032", "S19S_0034", "S19S_0034", "S19S_0034",
"S19S_0035", "S19S_0035", "S19S_0035", "S19S_0036", "S19S_0036",
"S19S_0036", "S19S_0037", "S19S_0038", "S19S_0038", "S19S_0039",
"S19S_0039", "S19S_0039", "S19S_0040", "S19S_0041", "S19S_0041",
"S19S_0041", "S19S_0042", "S19S_0042", "S19S_0042", "S19S_0043",
"S19S_0043", "S19S_0043", "S19S_0044", "S19S_0044", "S19S_0046",
"S19S_0046", "S19S_0046", "S19S_0049", "S19S_0049", "S19S_0049",
"S19S_0052", "S19S_0052", "S19S_0052", "S19S_0053", "S19S_0053",
"S19S_0053", "S19S_0054", "S19S_0054", "S19S_0054", "S19S_0055",
"S19S_0055", "S19S_0055", "S19S_0056", "S19S_0056", "S19S_0056",
"S19S_0057", "S19S_0057", "S19S_0057", "S19S_0058", "S19S_0058",
"S19S_0060", "S19S_0060", "S19S_0060", "S19S_0061", "S19S_0061",
"S19S_0063", "S19S_0063", "S19S_0064", "S19S_0064", "S19S_0065",
"S19S_0065", "S19S_0065", "S19S_0066", "S19S_0066", "S19S_0066",
"S19S_0067", "S19S_0067", "S19S_0067", "S19S_0068", "S19S_0068",
"S19S_0069", "S19S_0069", "S19S_0069", "S19S_0070", "S19S_0070",
"S19S_0070", "S19S_0071", "S19S_0071", "S19S_0071", "S19S_0072",
"S19S_0072", "S19S_0072", "S19S_0073", "S19S_0073", "S19S_0073",
"S19S_0074", "S19S_0074", "S19S_0074", "S19S_0075", "S19S_0075",
"S19S_0075", "S19S_0076", "S19S_0076", "S19S_0076", "S19S_0077",
"S19S_0077", "S19S_0077", "S19S_0078", "S19S_0078", "S19S_0078",
"S19S_0079", "S19S_0079", "S19S_0079", "S19S_0080", "S19S_0080",
"S19S_0080", "S19S_0081", "S19S_0081", "S19S_0082", "S19S_0082",
"S19S_0082", "S19S_0083", "S19S_0083", "S19S_0083", "S19S_0084",
"S19S_0084", "S19S_0084", "S19S_0086", "S19S_0086", "S19S_0086",
"S19S_0089", "S19S_0089", "S19S_0089", "S19S_0090", "S19S_0090",
"S19S_0090", "S19S_0091", "S19S_0091", "S19S_0091", "S19S_0092",
"S19S_0092", "S19S_0092", "S19S_0096", "S19S_0097", "S19S_0097",
"S19S_0098", "S19S_0098", "S19S_0098", "S19S_0099", "S19S_0099",
"S19S_0099", "S19S_0100", "S19S_0100", "S19S_0100", "S19S_0001",
"S19S_0001", "S19S_0001")), row.names = c(NA, -216L), class = "data.frame")
> dput(resp)
structure(list(rate_mg_per_L_per_h = c(15.22, 5.26, 4.91, 7.32,
7.04, 1.43, 6.28, 0.83, 13.44, 3.18, 6.51, 4.16, 2.89, 3.75,
4.03, 10.92, 0.12, 26.57, 7.68, 1.85, 1.95, 5.29, 3.58, 2.87,
0.39, 14.6, 1.23, 8.76, 3.7, 5.75, 4.55, 3.3, 32.74, 15.66, 16.44,
1.93, 0.67, 5.16, 5.06, 10.88, 6.92, 1.12, 0.1, 0.52, 7.67, 0.85,
0.43, 1.04, 5.27, 6.59, 1.66, 0.42, 7.06, 1.6, 11.16, 6.42, 27.96,
23.77, 0.75, 2.79, 16.92, 10.94, 16.51, 1.5, 9.37, 1.36, 1.05,
3.41, 11.22, 0.99, 2.67, 1.92, 0.46, 0.96, 0.44, 11.47, 7.3,
0.28, 7.24, 19.89, 1.07, 0.84, 10.69, 39.84, 0.33, 1.14, 0.55,
2.53, 2.5, 1.55, 4.51, 1.12, 3.7, 20.8, 28.35, 9.88, 10.08, 2.54,
16.33, 16.51, 13.52, 15, 1.84, 6.32, 0.64, 21.19, 0.32, 0.53,
25.2, 1.1, 24.44, 1, 0.24, 3.91, 1.23, 6.42, 9.65, 7.29, 1.33,
2.35, 0.93, 11.56, 1.19, 8.77, 5.3, 2.77, 3.32, 2.39, 12, 0.55,
12.08, 81.75, 0.83, 4.22, 2.82, 15.05, 1.47, 1.43, 1.48, 0.25,
11.32, 14.86, 6.4, 0.41, 3.15, 26.7, 0.59, 4.62, 0.67, 37.92,
3.81, 13.76, 2.37, 2.24, 41.85, 9.16, 1.5, 6.27, 6.92, 11.03,
0.88, 0.94, 4.71, 6.05, 5.24, 0.1, 0.39, 22.97, 0.83, 0.3, 0.47,
14.41, 1.01, 25.45, 1.7, 1.33, 5.04, 1.68, 1.02, 11.47, 4.75,
1.07, 1.34, 4.63, 13.31, 18.48, 7.56, 1.75), Samples = c("S19S_0006",
"S19S_0007", "S19S_0008", "S19S_0009", "S19S_0010", "S19S_0100",
"S19S_0011", "S19S_0012", "S19S_0014", "S19S_0015", "S19S_0016",
"S19S_0017", "S19S_0018", "S19S_0019", "S19S_0020", "S19S_0021",
"S19S_0022", "S19S_0023", "S19S_0025", "S19S_0026", "S19S_0027",
"S19S_0028", "S19S_0029", "S19S_0030", "S19S_0031", "S19S_0032",
"S19S_0033", "S19S_0034", "S19S_0035", "S19S_0036", "S19S_0037",
"S19S_0038", "S19S_0039", "S19S_0041", "S19S_0042", "S19S_0050",
"S19S_0051", "S19S_0052", "S19S_0053", "S19S_0055", "S19S_0059",
"S19S_0060", "S19S_0062", "S19S_0063", "S19S_0064", "S19S_0068",
"S19S_0069", "S19S_0070", "S19S_0071", "S19S_0072", "S19S_0074",
"S19S_0075", "S19S_0076", "S19S_0077", "S19S_0080", "S19S_0081",
"S19S_0082", "S19S_0083", "S19S_0084", "S19S_0085", "S19S_0086",
"S19S_0087", "S19S_0089", "S19S_0090", "S19S_0098", "S19S_0099",
"S19S_0006", "S19S_0008", "S19S_0009", "S19S_0010", "S19S_0100",
"S19S_0012", "S19S_0015", "S19S_0017", "S19S_0018", "S19S_0019",
"S19S_0021", "S19S_0022", "S19S_0023", "S19S_0025", "S19S_0026",
"S19S_0027", "S19S_0028", "S19S_0029", "S19S_0031", "S19S_0032",
"S19S_0033", "S19S_0034", "S19S_0035", "S19S_0036", "S19S_0037",
"S19S_0038", "S19S_0039", "S19S_0040", "S19S_0041", "S19S_0042",
"S19S_0043", "S19S_0050", "S19S_0051", "S19S_0052", "S19S_0053",
"S19S_0059", "S19S_0060", "S19S_0062", "S19S_0063", "S19S_0064",
"S19S_0069", "S19S_0070", "S19S_0071", "S19S_0072", "S19S_0073",
"S19S_0074", "S19S_0075", "S19S_0076", "S19S_0077", "S19S_0080",
"S19S_0082", "S19S_0083", "S19S_0084", "S19S_0085", "S19S_0086",
"S19S_0087", "S19S_0090", "S19S_0097", "S19S_0098", "S19S_0099",
"S19S_0007", "S19S_0008", "S19S_0009", "S19S_0010", "S19S_0100",
"S19S_0011", "S19S_0012", "S19S_0015", "S19S_0016", "S19S_0017",
"S19S_0018", "S19S_0019", "S19S_0020", "S19S_0022", "S19S_0023",
"S19S_0025", "S19S_0026", "S19S_0027", "S19S_0028", "S19S_0029",
"S19S_0031", "S19S_0032", "S19S_0033", "S19S_0034", "S19S_0035",
"S19S_0036", "S19S_0037", "S19S_0038", "S19S_0039", "S19S_0040",
"S19S_0041", "S19S_0042", "S19S_0043", "S19S_0047", "S19S_0050",
"S19S_0051", "S19S_0052", "S19S_0053", "S19S_0060", "S19S_0062",
"S19S_0063", "S19S_0064", "S19S_0066", "S19S_0069", "S19S_0070",
"S19S_0071", "S19S_0072", "S19S_0073", "S19S_0074", "S19S_0075",
"S19S_0076", "S19S_0077", "S19S_0080", "S19S_0082", "S19S_0083",
"S19S_0084", "S19S_0085", "S19S_0086", "S19S_0090", "S19S_0097",
"S19S_0098", "S19S_0099")), class = "data.frame", row.names = c(NA,
-188L))
> dput(head_npoc_sed)
structure(list(Sample_ID = c("S19S_0067_Sed_Field_ICR-D", "S19S_0072_Sed_Field_ICR-M",
"S19S_0073_Sed_Field_ICR-U", "S19S_0071_Sed_Field_ICR-U", "S19S_0064_Sed_Field_ICR-M",
"S19S_0067_Sed_Field_ICR-U"), X00681_NPOC_mg_per_L_as_C = c("24.32",
"8.9", "20.24", "31.58", "21.81", "29.74"), Samples = c("S19S_0067",
"S19S_0072", "S19S_0073", "S19S_0071", "S19S_0064", "S19S_0067"
), SampleID = c("Sample_S19S_0067_Sed_Field_ICR.D_P2", "Sample_S19S_0072_Sed_Field_ICR.M_P2",
"Sample_S19S_0073_Sed_Field_ICR.U_P2", "Sample_S19S_0071_Sed_Field_ICR.U_P2",
"Sample_S19S_0064_Sed_Field_ICR.M_P2", "Sample_S19S_0067_Sed_Field_ICR.U_P2"
)), row.names = c(NA, 6L), class = "data.frame")
Upvotes: 1
Views: 134
Reputation: 41533
The problem is that there are some duplicates in your factors_sed
and resp
dataframe. An approach is to add a row number within each group of Samples
so that only the first from your resp
dataframe will be joined to the first entry of your factors_sed
. You can use the following code:
library(dplyr)
left_join(factors_sed %>% group_by(Samples) %>% mutate(id = row_number()),
resp %>% group_by(Samples) %>% mutate(id = row_number()),
by = c("Samples", "id")) %>%
left_join(., head_npoc_sed, by = "Samples") %>%
select(-id)
Upvotes: 2
Reputation: 2506
The reason you're getting more rows when you join resp
onto factors_sed
is because there are multiple rows that have the same value in resp$Samples
. Here are some examples:
resp %>%
group_by(Samples) %>%
filter(n() > 1) %>%
arrange(Samples)
So you must create a data frame that does not have more than one row for every unique value in the Samples
column. Here is one way to do it, but you'll need to evaluate if mean
is the correct aggregating function (e.g., maybe sum
is better?).
resp_duprm <- resp %>%
group_by(Samples) %>%
summarize(rate_mg_per_L_per_h = mean(rate_mg_per_L_per_h), .groups = 'drop')
The same issue can be seen in npoc_sed
, you have more than one row for the same value in npoc_sed$Samples
. You only provided the first five rows, so I imagine there are many other examples than just the one here:
npoc_sed %>%
group_by(Samples) %>%
filter(n() > 1) %>%
arrange(Samples)
You can use a similar approach to the above, you just need to aggregate the data so there is only one row per unique value in the Samples
column. I had to drop the Sample_ID
, and SampleID
columns. Keeping them would not have given us one row per unique value in Sample
. Also, I chose mean
again, but another function might be more appropriate:
npoc_sed_duprm <- npoc_sed %>%
mutate(X00681_NPOC_mg_per_L_as_C = as.numeric(X00681_NPOC_mg_per_L_as_C)) %>% # had to convert to numeric, it came as character in the dput()
group_by(Samples) %>%
summarize(X00681_NPOC_mg_per_L_as_C = mean(X00681_NPOC_mg_per_L_as_C), .groups = 'drop')
Once you have resp_duprm
and npoc_sed_duprm
it's simply a matter of using left_join
.
factors_sed %>%
left_join(resp_duprm, by = 'Samples') %>%
left_join(npoc_sed_duprm, by = 'Samples')
Upvotes: 2