ansa
ansa

Reputation: 11

How to perform a calculation for all columns of a matrix?

I have a matrix with tree ring width for 66 trees. Rows represent the years of measurement and columns represent the different trees.

Now I want to calculate the growth difference from one year to another tree by tree. I managed to do that with the "lag" function from dplyr for each column separately. That's a lot code to write for 66 columns, so I am looking for a way to do that for all columns at once but column-wise.

my 'treegrowth' data.frame looks like this:

year tree1 tree2 tree3 ...
1900 0.72 0.34 1.34
1901 0.56 0.88 0.98
1902 1.23 0.56 1.67
...

For each tree I want to divide the value of one year by the value of the previous year and subset the old value with the result.

I can do that for each column like this:

  treegrowth$tree1 <- treegrowth$tree1 / lag(treegrowth$tree1)

But how can I manage this in one step for all trees (columns)?

If I just leave out the '$tree1' something strange is happening but not what I want to have.

> dput(head(treegrowth))
structure(list(WA12M = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), WA81M = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), WA101M = c(NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_), HA263M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), HA358M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), HA386M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), HA387M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), HA388M = c(0.73, 
0.73, 0.84, 0.43, 0.67, 0.72), HA390M = c(NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_), HA420M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), MI49M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), MI51M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), MI62M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), MI309M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), NO4M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), NO8M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), NO23M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), NO42M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), NO47M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), NO50M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), NO73M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA3M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA18M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA22M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA25M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA26M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA27M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA28M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA38M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA40M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA48M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA64M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA80M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA84M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA88M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA90M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA93M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA95M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA103M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA104M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA111M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA141M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA142M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA154M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA163M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA164M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA177M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA194M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA195M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA196M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA197M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA198M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA200M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA202M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA205M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA206M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA207M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA225M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA252M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA291M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA294M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA297M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA299M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA376M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA379M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), WA395M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), EI33M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), EI38M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), HA161M = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_)), row.names`c("1803", 
"1804", "1805", "1806", "1807", "1808"), class = c("rwl", "data.frame"
))

Upvotes: 1

Views: 375

Answers (5)

G. Grothendieck
G. Grothendieck

Reputation: 270055

The zoo package has diff.zoo which with the arithmetic=FALSE argument does exactly that. Using treegrowth given reproducibly in the Note at the end we use the following code which read it into a zoo object and then applies diff.

(Note that fortify.zoo will convert a zoo object to a data frame if you need that.)

library(zoo)

z <- read.zoo(as.data.frame(treegrowth))
zd <- diff(z, arithmetic = FALSE)

giving this zoo object:

zd
##          tree1     tree2     tree3
## 1901 0.7777778 2.5882353 0.7313433
## 1902 2.1964286 0.6363636 1.7040816

library(ggplot2)

autoplot(zd, facet = NULL) +
  geom_point() +
  scale_x_continuous(breaks = time(zd)) +
  xlab("year")

screenshot

Note

Lines <- "year tree1 tree2 tree3
1900 0.72 0.34 1.34
1901 0.56 0.88 0.98
1902 1.23 0.56 1.67"
treegrowth <- as.matrix(read.table(text = Lines, header = TRUE))

Upvotes: 2

Ronak Shah
Ronak Shah

Reputation: 389215

Using base R, we can use lapply

treegrowth[paste0(names(df[-1]), "_growth")] <- lapply(treegrowth[-1], 
                  function(x) c(NA, x[-1]/x[-length(x)]))

#  year tree1 tree2 tree3 tree1_growth tree2_growth tree3_growth
#1 1900  0.72  0.34  1.34           NA           NA           NA
#2 1901  0.56  0.88  0.98    0.7777778    2.5882353    0.7313433
#3 1902  1.23  0.56  1.67    2.1964286    0.6363636    1.7040816

Or if you want to use dplyr and lag, we can use mutate_at

library(dplyr)
treegrowth %>% mutate_at(-1, list(growth = ~./lag(.)))

data

treegrowth <- structure(list(year = 1900:1902, tree1 = c(0.72, 0.56, 1.23), 
tree2 = c(0.34, 0.88, 0.56), tree3 = c(1.34, 0.98, 1.67)), 
class = "data.frame", row.names = c(NA, -3L))

Upvotes: 5

Joseph Crispell
Joseph Crispell

Reputation: 538

You could try the following code:

treeGrowth <- data.frame("year"=c(1900, 1901, 1902), 
                         "tree1"=c(0.72, 0.56, 1.23), 
                         "tree2"=c(0.34, 0.88, 0.56), 
                         "tree3"=c(1.34, 0.98, 1.67))

for(column in colnames(treeGrowth)[-1]){
    treeGrowth[, paste0(column, "_growth")] <- c(NA, treeGrowth[-1, column] /     
                                                     treeGrowth[-nrow(data), 
                                                                column])
}

print(treeGrowth)

#  year tree1 tree2 tree3 tree1_growth tree2_growth tree3_growth
#1 1900  0.72  0.34  1.34    1.0000000    1.0000000    1.0000000
#2 1901  0.56  0.88  0.98    0.7777778    2.5882353    0.7313433
#3 1902  1.23  0.56  1.67    2.1964286    0.6363636    1.7040816

Upvotes: 0

shs
shs

Reputation: 3899

If you reshape your data to a longer format, this becomes easier:

df %>% 
  gather("tree", "width", -year) %>% 
  group_by(tree) %>% 
  mutate(growth = width / lag(width))

Upvotes: 0

Boidot
Boidot

Reputation: 373

You could try changing the format of your dataframe to a long format with a melt function (from the reshape2 package). Once in a long format you can perform your calculation easily with a loop.

If your treename becomes your variable you could write something like

for (tree in levels(treegrowth_m$variable)){
    treegrowth_m$tree <- treegrowth_m$tree / lag(treegrowth_m$tree)
}

where treegrowth_m is your melted df

Upvotes: 0

Related Questions