Reputation: 2091
I have a data frame that looks like this:
YEAR X1990_lu X2000_lu X2010_lu soil water
1 1990 215.0310 215.0310 215.0310 3.588198 5.287578
2 2007 415.3221 415.3221 415.3221 8.094746 5.788305
3 1994 263.5908 263.5908 263.5908 4.680792 5.408977
4 2010 453.2070 453.2070 453.2070 8.947157 5.883017
5 2012 476.1869 476.1869 476.1869 9.464206 5.940467
6 1981 118.2226 118.2226 118.2226 1.410008 5.045556
7 1998 311.2422 311.2422 311.2422 5.752949 5.528105
8 2011 456.9676 456.9676 456.9676 9.031771 5.892419
9 1999 320.5740 320.5740 320.5740 5.962915 5.551435
10 1995 282.6459 282.6459 282.6459 5.109533 5.456615
11 2013 482.7333 482.7333 482.7333 9.611500 5.956833
12 1995 281.3337 281.3337 281.3337 5.080007 5.453334
13 2003 371.0283 371.0283 371.0283 7.098136 5.677571
14 2000 329.0534 329.0534 329.0534 6.153701 5.572633
15 1983 141.1699 141.1699 141.1699 1.926322 5.102925
I need to set any column with _lu
in the name to NA if the corresponding numeric part of the column name is greater than the value of YEAR for that row. I can do this for each individual column using the code below, where I extract the numeric part of the _lu
column names and make a numeric vector to compare to YEAR. But, can this be done for all the columns by using apply or possibly map statements?
## make example data
set.seed(123)
soil <- runif(15,1,10)
set.seed(123)
water <- runif(15,5,6)
set.seed(123)
X1990_lu <- runif(15,100,500)
set.seed(123)
X2000_lu <- runif(15,100,500)
set.seed(123)
X2010_lu <- runif(15,100,500)
set.seed(123)
YEAR <- as.integer(runif(15,1980,2015))
data <- data.frame(YEAR, X1990_lu, X2000_lu, X2010_lu, soil, water)
# extract the column indices of the landuse columns
lucolsind <- grep("_lu", names(data))
# remove the x from each landuse column name
colnames(data)[lucolsind] <- substring(names(data[,lucolsind]), 2)
# get the column names
lucolnms <- names(data[,lucolsind])
# get the column names as a split list
lucolnms_lst <- strsplit(names(data[,lucolsind]), c("_"))
# extract just the year indicator
luyears <- unlist(lapply(lucolnms_lst, `[[`, 1))
# set the first LU column to NA where year is less than the lu year
data[,lucolsind[1]] <- ifelse(data$YEAR < luyears[1], NA, data[,lucolsind[1]])
Here is how it looks after processing just the first _lu
column
YEAR 1990_lu 2000_lu 2010_lu soil water
1 1990 215.0310 215.0310 215.0310 3.588198 5.287578
2 2007 415.3221 415.3221 415.3221 8.094746 5.788305
3 1994 263.5908 263.5908 263.5908 4.680792 5.408977
4 2010 453.2070 453.2070 453.2070 8.947157 5.883017
5 2012 476.1869 476.1869 476.1869 9.464206 5.940467
6 1981 NA 118.2226 118.2226 1.410008 5.045556
7 1998 311.2422 311.2422 311.2422 5.752949 5.528105
8 2011 456.9676 456.9676 456.9676 9.031771 5.892419
9 1999 320.5740 320.5740 320.5740 5.962915 5.551435
10 1995 282.6459 282.6459 282.6459 5.109533 5.456615
11 2013 482.7333 482.7333 482.7333 9.611500 5.956833
12 1995 281.3337 281.3337 281.3337 5.080007 5.453334
13 2003 371.0283 371.0283 371.0283 7.098136 5.677571
14 2000 329.0534 329.0534 329.0534 6.153701 5.572633
15 1983 NA 141.1699 141.1699 1.926322 5.102925
Upvotes: 0
Views: 61
Reputation: 16121
A 'tidyverse' approach that uses reshaping in order to compare the values of interest with the corresponding date in the column name row by row:
dt = read.table(text = "
YEAR X1990_lu X2000_lu X2010_lu soil water
1 1990 215.0310 215.0310 215.0310 3.588198 5.287578
2 2007 415.3221 415.3221 415.3221 8.094746 5.788305
3 1994 263.5908 263.5908 263.5908 4.680792 5.408977
4 2010 453.2070 453.2070 453.2070 8.947157 5.883017
5 2012 476.1869 476.1869 476.1869 9.464206 5.940467
6 1981 118.2226 118.2226 118.2226 1.410008 5.045556
7 1998 311.2422 311.2422 311.2422 5.752949 5.528105
8 2011 456.9676 456.9676 456.9676 9.031771 5.892419
9 1999 320.5740 320.5740 320.5740 5.962915 5.551435
10 1995 282.6459 282.6459 282.6459 5.109533 5.456615
11 2013 482.7333 482.7333 482.7333 9.611500 5.956833
12 1995 281.3337 281.3337 281.3337 5.080007 5.453334
13 2003 371.0283 371.0283 371.0283 7.098136 5.677571
14 2000 329.0534 329.0534 329.0534 6.153701 5.572633
15 1983 141.1699 141.1699 141.1699 1.926322 5.102925
", header=T)
library(tidyverse)
dt %>%
gather(var,value,-YEAR) %>%
mutate(value = ifelse(YEAR < as.numeric(gsub("\\D", "", var)) & !is.na(as.numeric(gsub("\\D", "", var))), NA, value)) %>%
group_by(YEAR, var) %>%
mutate(id = row_number()) %>%
spread(var, value) %>%
select(-id) %>%
ungroup()
# # A tibble: 15 x 6
# YEAR soil water X1990_lu X2000_lu X2010_lu
# <int> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1981 1.41 5.05 NA NA NA
# 2 1983 1.93 5.10 NA NA NA
# 3 1990 3.59 5.29 215. NA NA
# 4 1994 4.68 5.41 264. NA NA
# 5 1995 5.11 5.46 283. NA NA
# 6 1995 5.08 5.45 281. NA NA
# 7 1998 5.75 5.53 311. NA NA
# 8 1999 5.96 5.55 321. NA NA
# 9 2000 6.15 5.57 329. 329. NA
# 10 2003 7.10 5.68 371. 371. NA
# 11 2007 8.09 5.79 415. 415. NA
# 12 2010 8.95 5.88 453. 453. 453.
# 13 2011 9.03 5.89 457. 457. 457.
# 14 2012 9.46 5.94 476. 476. 476.
# 15 2013 9.61 5.96 483. 483. 483.
Upvotes: 0
Reputation: 20095
An option is to use sapply
on column's ending with _lu
. This can be achieved as:
df[,grepl("_lu$",names(df))] <-
sapply(grep("_lu$",names(df), value = TRUE), function(x){
# Convert column names to numeric and compare with YEAR value of that row
x = ifelse(df$YEAR < as.numeric(gsub("X(\\d+)_lu","\\1",x)), NA, df[,x])
x
})
df
# YEAR X1990_lu X2000_lu X2010_lu soil water
# 1 1990 215.0310 NA NA 3.588198 5.287578
# 2 2007 415.3221 415.3221 NA 8.094746 5.788305
# 3 1994 263.5908 NA NA 4.680792 5.408977
# 4 2010 453.2070 453.2070 453.2070 8.947157 5.883017
# 5 2012 476.1869 476.1869 476.1869 9.464206 5.940467
# 6 1981 NA NA NA 1.410008 5.045556
# 7 1998 311.2422 NA NA 5.752949 5.528105
# 8 2011 456.9676 456.9676 456.9676 9.031771 5.892419
# 9 1999 320.5740 NA NA 5.962915 5.551435
# 10 1995 282.6459 NA NA 5.109533 5.456615
# 11 2013 482.7333 482.7333 482.7333 9.611500 5.956833
# 12 1995 281.3337 NA NA 5.080007 5.453334
# 13 2003 371.0283 371.0283 NA 7.098136 5.677571
# 14 2000 329.0534 329.0534 NA 6.153701 5.572633
# 15 1983 NA NA NA 1.926322 5.102925
Data:
df <- read.table(text =
" YEAR X1990_lu X2000_lu X2010_lu soil water
1 1990 215.0310 215.0310 215.0310 3.588198 5.287578
2 2007 415.3221 415.3221 415.3221 8.094746 5.788305
3 1994 263.5908 263.5908 263.5908 4.680792 5.408977
4 2010 453.2070 453.2070 453.2070 8.947157 5.883017
5 2012 476.1869 476.1869 476.1869 9.464206 5.940467
6 1981 118.2226 118.2226 118.2226 1.410008 5.045556
7 1998 311.2422 311.2422 311.2422 5.752949 5.528105
8 2011 456.9676 456.9676 456.9676 9.031771 5.892419
9 1999 320.5740 320.5740 320.5740 5.962915 5.551435
10 1995 282.6459 282.6459 282.6459 5.109533 5.456615
11 2013 482.7333 482.7333 482.7333 9.611500 5.956833
12 1995 281.3337 281.3337 281.3337 5.080007 5.453334
13 2003 371.0283 371.0283 371.0283 7.098136 5.677571
14 2000 329.0534 329.0534 329.0534 6.153701 5.572633
15 1983 141.1699 141.1699 141.1699 1.926322 5.102925",
header = TRUE, stringsAsFactors = FALSE)
Upvotes: 1