Reputation: 1120
I've got two data.frames
:
First:
> dput(head(tbl_mz))
structure(list(m.z = c(258.1686969, 258.168752, 587.8313625,
587.8425292, 523.2863282, 523.2859396), Measured.mass = c(514.3228408,
514.3229511, 1173.648172, 802.4706732, 1272.645144, 1044.557326
)), .Names = c("m.z", "Measured.mass"), row.names = c(NA, 6L), class = "data.frame")
Second:
> dput(head(tbl_exl))
structure(list(V1 = c(802.4706732, 1272.649209, 1272.646875,
1272.646599, 1272.646521, 1272.645144), V2 = c(NA, NA, NA, NA,
NA, NA), V3 = c(NA, NA, NA, NA, NA, NA), V4 = c(NA, NA, NA, NA,
NA, NA), V5 = c(NA, NA, NA, NA, NA, NA), V6 = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("", "Positive"), class = "factor"),
V7 = c(28.7, 29.4, 29.4, 23.8, 28.6, 23.3), V8 = c(30.7,
31.4, 31.4, 25.8, 30.6, 25.3), X = c(NA, NA, NA, NA, NA,
NA), X.1 = c(NA, NA, NA, NA, NA, NA), X.2 = c(NA, NA, NA,
NA, NA, NA)), .Names = c("V1", "V2", "V3", "V4", "V5", "V6",
"V7", "V8", "X", "X.1", "X.2"), row.names = c(NA, 6L), class = "data.frame")
I would like to replace some values from tbl_exl
, column V1
by values from the the other table tbl_mz
. The values from column V1
(tbl_exl) can be found in the column Measured.mass
(tbl_mz) and they should be replaced by the values from the next column m.z
in tbl_mz
data frame.
In another words the values in the V1
should be replaced by the m.z
values.
The problem is that not all values from V1
can't be find in the other data frame. Those which can be find can be deleted or just left like they are.
The output, which I want to get:
> dput(head(tbl_exl_modified))
structure(list(V1 = c(587.8425292, 1272.649209, 1272.646875,
1272.646599, 1272.646521, 523.2863282), V2 = c(NA, NA, NA, NA,
NA, NA), V3 = c(NA, NA, NA, NA, NA, NA), V4 = c(NA, NA, NA, NA,
NA, NA), V5 = c(NA, NA, NA, NA, NA, NA), V6 = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("", "Positive"), class = "factor"),
V7 = c(28.7, 29.4, 29.4, 23.8, 28.6, 23.3), V8 = c(30.7,
31.4, 31.4, 25.8, 30.6, 25.3), X = c(NA, NA, NA, NA, NA,
NA), X.1 = c(NA, NA, NA, NA, NA, NA), X.2 = c(NA, NA, NA,
NA, NA, NA)), .Names = c("V1", "V2", "V3", "V4", "V5", "V6",
"V7", "V8", "X", "X.1", "X.2"), row.names = c(NA, 6L), class = "data.frame")
Upvotes: 1
Views: 809
Reputation: 886938
You could try match
. Create numeric indexes based on the match
between the columns ("Measured.mass", "V1") of the two datasets. Remove the NA
values ("indx1", "indxN1") and replace the "V1" values to "m.z" based on these index.
indx <- match(tbl_mz$Measured.mass, tbl_exl$V1)
indx1 <- indx[!is.na(indx)]
indxN <- match(tbl_exl$V1, tbl_mz$Measured.mass)
indxN1 <- indxN[!is.na(indxN)]
tbl_exl$V1[indx1] <- tbl_mz$m.z[indxN1]
identical(tbl_exl, tbl_exl_modified)
#[1] TRUE
Or use left_join
from dplyr
library(dplyr)
tbl_exl1 <- left_join(tbl_exl, tbl_mz, by=c('V1'='Measured.mass')) %>%
mutate(V1= pmax((NA^!is.na(m.z))*V1, m.z,
na.rm=TRUE)) %>%
select(-m.z)
tbl_exl1
# V1 V2 V3 V4 V5 V6 V7 V8 X X.1 X.2
#1 587.8425 NA NA NA NA Positive 28.7 30.7 NA NA NA
#2 1272.6492 NA NA NA NA Positive 29.4 31.4 NA NA NA
#3 1272.6469 NA NA NA NA Positive 29.4 31.4 NA NA NA
#4 1272.6466 NA NA NA NA Positive 23.8 25.8 NA NA NA
#5 1272.6465 NA NA NA NA Positive 28.6 30.6 NA NA NA
#6 523.2863 NA NA NA NA Positive 23.3 25.3 NA NA NA
Upvotes: 3
Reputation: 92282
Here's a solution using data.table
s binary join
library(data.table)
setnames(setDT(tbl_exl), 1, "Measured.mass") # Changing the first column name for the join to work
setkey(tbl_exl, Measured.mass) # Keying tbl_exl by `Measured.mass`
setkey(setDT(tbl_mz), Measured.mass) # Keying tbl_exl by `Measured.mass`
tbl_exl[tbl_mz, Measured.mass := i.m.z][] # Joining and retrieving only matched values from `i.m.z`
# Measured.mass V2 V3 V4 V5 V6 V7 V8 X X.1 X.2
# 1: 587.8425 NA NA NA NA Positive 28.7 30.7 NA NA NA
# 2: 523.2863 NA NA NA NA Positive 23.3 25.3 NA NA NA
# 3: 1272.6465 NA NA NA NA Positive 28.6 30.6 NA NA NA
# 4: 1272.6466 NA NA NA NA Positive 23.8 25.8 NA NA NA
# 5: 1272.6469 NA NA NA NA Positive 29.4 31.4 NA NA NA
# 6: 1272.6492 NA NA NA NA Positive 29.4 31.4 NA NA NA
Upvotes: 3