Christopher Pestana
Christopher Pestana

Reputation: 1

How do I speed up this for loop?

I am having trouble speeding up the following code that I modified from a Maxwell et al (2018) paper. https://www.pnas.org/content/115/19/4891.short

ttrees.cal3.trees <- list()
n <- 1000
for (i in 1:n){
ttrees.cal3.trees[[i]] <- rtree(10)
ttrees.cal3.trees[[i]]$edge.length <- rep(0, 11)
while (min(ttrees.cal3.trees[[i]]$edge.length) == 0) {
  ttrees.cal3.trees[[i]] <- cal3TimePaleoPhy(tree = tree3, timeData 

= ranges, brRate = 1.775051,
    extRate = 1.775051, sampRate = 0.023, ntrees = 1, root.max = 1)
    }
    
}


,FAD,LAD
H_neanderthalenis,0.25,0.04
H_heidelbergensis,0.6,0.1
H_sapiens,0.335,0
H_antecessor,1,0.396
H_naledi_195,0.335,0.236
Asian_H_erectus_153,1.9,0.03
African_H_erectus_DAN5_P1,1.6,1.5
African_H_erectus,1.9,0.03
Georgian_H_erectus,1.8,1.7
H_rudolfensis_97,2.4,1.6
H_habilis_106,2.4,1.8
Au_sediba_93,1.98,1.97
H_floresiensis_LB1,0.74,0.17
Au_africanus_59,3,2.4
P_aethiopicus_67,2.5,2.3
P_bosei_77,2.3,1.4
P_robustus_89,2,1.5
K_platyops,3.54,3
Au_ghari_63,2.5,2.45
Au_afarensis,3.7,3
Au_afarensis_36,3.7,3
Au_afarensis_37,3.7,3
Au_afarensis_38,3.7,3
Au_anamensis,4.2,3.9
Ar_ramidus,4.5,4.3
S_tchadensis,7,7
P_troglodytes_4,7.3,0
P_troglodytes_5,7.3,0
P_troglodytes_6,7.3,0
G_gorilla,8,0


#NEXUS


BEGIN TREES;
    TRANSLATE
[0]     1   'H_neanderthalenis',
[1]     2   'H_heidelbergensis',
[2]     3   'H_sapiens',
[3]     4   'H_antecessor',
[4]     5   'H_naledi_195',
[5]     6   'Asian_H_erectus_153',
[6]     7   'African_H_erectus_DAN5_P1',
[7]     8   'African_H_erectus',
[8]     9   'Georgian_H_erectus',
[9]     10  'H_rudolfensis_97',
[10]    11  'H_habilis_106',
[11]    12  'Au_sediba_93',
[12]    13  'H_floresiensis_LB1',
[13]    14  'Au_africanus_59',
[14]    15  'P_aethiopicus_67',
[15]    16  'P_robustus_89',
[16]    17  'P_bosei_77',
[17]    18  'K_platyops',
[18]    19  'Au_ghari_63',
[19]    20  'Au_afarensis',
[20]    21  'Au_afarensis_36',
[21]    22  'Au_afarensis_37',
[22]    23  'Au_afarensis_38',
[23]    24  'Au_anamensis',
[24]    25  'Ar_ramidus',
[25]    26  'S_tchadensis',
[26]    27  'P_troglodytes_4',
[27]    28  'P_troglodytes_5',
[28]    29  'P_troglodytes_6',
[29]    30  'G_gorilla'
    ;

  

TREE = (((((((((((((((((1,2),(3)),4),5),(13,6)),7,8),9),10),(11,12)),(14,(15,(17,16)))), 18),(19,(22,21,23,20))),24),25),26),(28,29,27)),30);

END;


tree3<-read.nexus("third-app.nex")
ranges<-read.csv("time.csv", header=T, row.names=1)
ranges <- as.data.frame(ranges, row.names = row.names(ranges))

I am unsure of how to change the methods to get the code to work using the apply family. Any assistance would be much appreciated.

Upvotes: 0

Views: 54

Answers (1)

Allan Cameron
Allan Cameron

Reputation: 173858

You could rewrite your code as an lapply as follows:

ttrees.cal3.trees <- lapply(seq(1000), function(i) {
  tmp <- rtree(10)
  tmp$edge.length <- rep(0, 11)
  while (min(tmp$edge.length) == 0) {
    tmp <- cal3TimePaleoPhy(tree = tree3, 
                            timeData = ranges, 
                            brRate = 1.775051,
                            extRate = 1.775051, 
                            sampRate = 0.023, 
                            ntrees = 1, 
                            root.max = 1)
  }
  return(tmp)
})

However, I doubt this will speed up the calculation much. It is not iterating through a list that is the resource bottleneck here, but the repeated calls to cal3TimePaleoPhy until a random sample is drawn with no zero-length edges. Without having the data you are using, it's not possible to test other ways in which this might be achieved.

Upvotes: 1

Related Questions