R:for loop execution time?

Question

Is there a way to approximate how long a for loop will take to run? I have a loop with about 500,000 iterations which does some basic calculations and its been running for a while now. I'm skeptical that it might be a never-ending loop.

Here is the code:

mod<- function(file, level = 5){
  df<- read.csv(file = file,header = FALSE,sep = "", col.names = c("DateTime","Seq","BP1","BQ1","BO1","AP1","AQ1","AO1","BP2","BQ2","BO2","AP2","AQ2","AO2","BP3","BQ3","BO3","AP3","AQ3","AO3","BP4","BQ4","BO4","AP4","AQ4","AO4","BP5","BQ5","BO5","AP5","AQ5","AO5","BP6","BQ6","BO6","AP6","AQ6","AO6","BP7","BQ7","BO7","AP7","AQ7","AO7","BP8","BQ8","BO8","AP8","AQ8","AO8","BP9","BQ9","BO9","AP9","AQ9","AO9","BP10","BQ10","BO10","AP10","AQ10","AO10","C","Price","Qty","OldPrice","OldQty"))
  df<- df[which(df$DateTime != 0),]
  df$DateTime= as.POSIXct(df$DateTime/(10^9), origin="1970-01-01")    #timestamp conversion
  change = c()
  for(i in 2:nrow(df)){
    if(is.na(df[i,6]) == TRUE){
      change[i] = 0
      next
    } else if(is.na(df[i,63]) == TRUE){
      change[i] = 0
      next
    }
    #browser()
    if(df[i,63] == "N"){
      a = which(df[i,] == df[i,64])
      if(a[1] > 32){
        change[i] = 0
      } else if(a[1] < 32){
        change[i] = a[1]
      }
      change
    }
    #browser()
    if(df[i,63] == "C"){
      a = which(df[i,] == df[i,64])
      if(a[1] > 32){
        change[i] = 0
      }else if(a[1] < 32){
        change[i] = a[1]*-1
      }
      change
    }
    #browser()
    if(df[i,63] == "M"){
      a = which(df[i,] == df[i,64])
      b = which(df[i-1,] == df[i,66])
      if(a[1] > 32 & b[1] > 32){
        change[i] = 0
      } else if(a[1] < 32 & b[1] > 32){
        change[i] = a[1]
      } else if(a[1] < 32 & b[1] < 32){
        change[i] = b[1] - a[1]
      }
      #browser()
      change
    }
    change
  }
  change
}

What I am trying to do is first see what column 63("C") says, if it is "N" or "C" then look at column 64("Price") and locate its position in that row, apart from column 64 itself, and then assign the column number to change[i]. Make it negative if col63 was "C" and positive if col63 was "N"

If column 63("C") says "M" then look at column 66("OldPrice") first and locate it in the previous row i-1. Then locate the price in column 64("Price") in the same row and take the difference between them(the column numbers) and assign it to change[i]

So the output should be a vector of negative or positive integers.

> dput(df[1:20,])
structure(list(DateTime = c(1.448855100369e+18, 1.448855100369e+18, 
1.448855100375e+18, 1.448855100376e+18, 1.448855100378e+18, 1.448855100379e+18, 
1.44885510038e+18, 1.44885510038e+18, 1.44885510038e+18, 1.448855100383e+18, 
1.448855100384e+18, 1.448855100385e+18, 1.448855100385e+18, 1.448855100385e+18, 
1.448855100386e+18, 1.448855100386e+18, 1.448855100386e+18, 1.448855100387e+18, 
1.448855100389e+18, 1.448855100389e+18), Seq = c(92L, 108L, 406L, 
479L, 643L, 722L, 811L, 822L, 828L, 1046L, 1103L, 1171L, 1186L, 
1196L, 1238L, 1249L, 1254L, 1273L, 1333L, 1343L), BP1 = c(80830L, 
80830L, 81100L, 81100L, 81100L, 81100L, 81100L, 81100L, 81100L, 
81100L, 81100L, 81100L, 81100L, 81100L, 81100L, 81100L, 81200L, 
81200L, 81200L, 81200L), BQ1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BO1 = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), AP1 = c(0L, 83435L, 83435L, 82165L, 82165L, 82165L, 
82165L, 82165L, 82345L, 82345L, 82165L, 82345L, 82345L, 82165L, 
82340L, 82340L, 82340L, 82340L, 82165L, 82340L), AQ1 = c(0L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), AO1 = c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BP2 = c(0L, 0L, 80830L, 
80830L, 80830L, 80830L, 80830L, 80835L, 80835L, 80835L, 80835L, 
80835L, 80835L, 80835L, 80835L, 80835L, 81100L, 81100L, 81100L, 
81100L), BQ2 = c(0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BO2 = c(0L, 0L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), AP2 = c(0L, 0L, 0L, 83435L, 83200L, 82650L, 82650L, 82650L, 
82650L, 82650L, 82650L, 82650L, 82650L, 82650L, 82650L, 82650L, 
82650L, 82650L, 82650L, 82650L), AQ2 = c(0L, 0L, 0L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), 
    AO2 = c(0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L), BP3 = c(0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 80830L, 80830L, 80830L, 80830L, 80830L, 80830L, 80830L, 
    80830L, 80830L, 80835L, 80835L, 80835L, 80835L), BQ3 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), BO3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), AP3 = c(0L, 
    0L, 0L, 0L, 83435L, 83200L, 83200L, 83200L, 83200L, 83200L, 
    83200L, 83200L, 82900L, 82900L, 82900L, 82900L, 82900L, 82900L, 
    82900L, 82900L), AQ3 = c(0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), AO3 = c(0L, 
    0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), BP4 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 80830L, 80830L, 80830L, 80830L
    ), BQ4 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), BO4 = c(0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 
    1L), AP4 = c(0L, 0L, 0L, 0L, 0L, 83435L, 83430L, 83430L, 
    83430L, 83430L, 83430L, 83430L, 83200L, 83200L, 83200L, 83200L, 
    83200L, 83200L, 83200L, 83200L), AQ4 = c(0L, 0L, 0L, 0L, 
    0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
    2L), AO4 = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BP5 = c(0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 80035L, 
    80035L, 80035L), BQ5 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L), BO5 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 1L, 1L, 1L), AP5 = c(0L, 0L, 0L, 0L, 0L, 0L, 83435L, 
    83435L, 83435L, 83435L, 83435L, 83435L, 83430L, 83430L, 83430L, 
    83430L, 83430L, 83430L, 83430L, 83430L), AQ5 = c(0L, 0L, 
    0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), AO5 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BP6 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), BQ6 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BO6 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), AP6 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 83500L, 83500L, 83500L, 83435L, 83435L, 83435L, 83435L, 
    83435L, 83435L, 83435L, 83435L), AQ6 = c(0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L), AO6 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BP7 = c(0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L), BQ7 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BO7 = c(0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L), AP7 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 83500L, 83500L, 83500L, 83500L, 83500L, 83500L, 
    83500L, 83500L), AQ7 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), AO7 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), BP8 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BQ8 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), BO8 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AP8 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), AQ8 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AO8 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), BP9 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BQ9 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), BO9 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AP9 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), AQ9 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AO9 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), BP10 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BQ10 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), BO10 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AP10 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), AQ10 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AO10 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), C = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 3L, 4L, 3L, 3L, 4L, 3L, 3L, 4L, 4L, 4L, 3L, 3L), .Label = c("", 
    "C", "M", "N"), class = "factor"), Price = c(80830L, 83435L, 
    81100L, 82165L, 83200L, 82650L, 83430L, 80835L, 82345L, 83500L, 
    82165L, 82345L, 82900L, 82165L, 82340L, 83200L, 81200L, 80035L, 
    82165L, 82340L), Qty = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), OldPrice = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, 82165L, NA, 82345L, 82165L, NA, 
    82345L, 82165L, NA, NA, NA, 82340L, 82165L), OldQty = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, 1L, NA, 1L, 1L, NA, 1L, 1L, NA, 
    NA, NA, 1L, 1L)), .Names = c("DateTime", "Seq", "BP1", "BQ1", 
"BO1", "AP1", "AQ1", "AO1", "BP2", "BQ2", "BO2", "AP2", "AQ2", 
"AO2", "BP3", "BQ3", "BO3", "AP3", "AQ3", "AO3", "BP4", "BQ4", 
"BO4", "AP4", "AQ4", "AO4", "BP5", "BQ5", "BO5", "AP5", "AQ5", 
"AO5", "BP6", "BQ6", "BO6", "AP6", "AQ6", "AO6", "BP7", "BQ7", 
"BO7", "AP7", "AQ7", "AO7", "BP8", "BQ8", "BO8", "AP8", "AQ8", 
"AO8", "BP9", "BQ9", "BO9", "AP9", "AQ9", "AO9", "BP10", "BQ10", 
"BO10", "AP10", "AQ10", "AO10", "C", "Price", "Qty", "OldPrice", 
"OldQty"), row.names = c(NA, 20L), class = "data.frame")

Roland · Accepted Answer

Here is how I would do this. The only loop needed is to apply which, which should be fast:

#find column matches for price
DF$change <- apply(DF[, 3:62] == DF[,64], 1, which) + 2L
#negative for C
DF$change[DF[,63] == "C"] <- DF$change[DF[,63] == "C"] * (-1)
#column matches for old price in preceding row if M
pos2 <- apply(DF[which(DF[,63] == "M") - 1, 3:62] == DF[DF[,63] == "M",66], 1, which) + 2L
#assign the difference
DF$change[DF[,63] == "M"] <- pos2 - DF$change[DF[,63] == "M"]
DF$change
#[1]  3  6  3  6 12 12 24  9  0 36  0  0 18  0  0 24  3 27  0  0

This assumes that there is always a matching column. If that's not the case wrap which in a function that returns NA if which returns integer(0).

R:for loop execution time?

Answers (1)

Related Questions