Alberto Ferrario
Alberto Ferrario

Reputation: 67

How to update a rating sequentially?

Given this simple dataset:

data <- data.frame(ID=seq(1:15),
                   H.team=c("GS","LAC","MIL","CHA","MIL","ATL","TOR","CHA","LAC","GS","TOR","MIL","ATL","CHA","TOR"),
                   A.team=c("MIL","CHA","TOR","ATL","GS","MIL","LAC","GS","TOR","ATL","CHA","LAC","GS","MIL","ATL"),
                   H.pts=c(94,120,91,84,88,96,93,95,113,85,101,116,86,102,90),
                   A.pts=c(84,107,99,75,90,105,87,99,94,87,92,106,84,89,89))
data
   ID H.team A.team H.pts A.pts
1   1     GS    MIL    94    84
2   2    LAC    CHA   120   107
3   3    MIL    TOR    91    99
4   4    CHA    ATL    84    75
5   5    MIL     GS    88    90
6   6    ATL    MIL    96   105
7   7    TOR    LAC    93    87
8   8    CHA     GS    95    99
9   9    LAC    TOR   113    94
10 10     GS    ATL    85    87
11 11    TOR    CHA   101    92
12 12    MIL    LAC   116   106
13 13    ATL     GS    86    84
14 14    CHA    MIL   102    89
15 15    TOR    ATL    90    89

I'm trying to calculate a new rating variable (rat) for each team, the results should be:

   ID H.team A.team H.pts A.pts   h.rbef   a.rbef   h.raft   a.raft
1   1     GS    MIL    94    84 1500.000 1500.000 1508.487 1491.513
2   2    LAC    CHA   120   107 1500.000 1500.000 1510.021 1489.979
3   3    MIL    TOR    91    99 1491.513 1500.000 1481.066 1510.447
4   4    CHA    ATL    84    75 1489.979 1500.000 1498.279 1491.700
5   5    MIL     GS    88    90 1481.066 1508.487 1475.842 1513.711
6   6    ATL    MIL    96   105 1491.700 1475.842 1479.614 1487.928
7   7    TOR    LAC    93    87 1510.447 1510.021 1516.760 1503.708
8   8    CHA     GS    95    99 1498.279 1513.711 1491.164 1520.826
9   9    LAC    TOR   113    94 1503.708 1516.760 1517.357 1503.111
10 10     GS    ATL    85    87 1520.826 1479.614 1514.361 1486.079
11 11    TOR    CHA   101    92 1503.111 1491.164 1510.678 1483.597
12 12    MIL    LAC   116   106 1487.928 1517.357 1497.502 1507.783
13 13    ATL     GS    86    84 1486.079 1514.361 1490.516 1509.924
14 14    CHA    MIL   102    89 1483.597 1497.502 1494.213 1486.886
15 15    TOR    ATL    90    89 1510.678 1490.516 1513.711 1487.483

the first value of rat is 1500 for each team;

after a match is played, the value of rat is updated as follow:

rat.after=rat.before+k*(S-E)

where S = 1 if the team won, 0 otherwise

E is the matchup winning probabilities before the match starts, and is defined by the following function:

win.probs<- function(h.rbef, a.rbef, hca=64) {
  h = 10^(h.rbef/400)
  a = 10^(a.rbef/400)
  hca = 10^(hca/400)
  den = a + hca*h
  h.prob = hca*h / den
  a.prob = a / den
  return(c(h.prob,a.prob))
}
#example (not run): win.probs(1500,1500)

k is a moving constant defined as follow:

rat.k<- function(h.pts,a.pts,h.rbef,a.rbef) {
  ifelse(h.pts-a.pts>0,
         20*(h.pts-a.pts+3)^0.8/(7.5+0.006*(h.rbef-a.rbef)),
         20*(-(h.pts-a.pts)+3)^0.8/(7.5+0.006*(-(h.rbef-a.rbef))))
}
#example (not run): rat.k(94,84,1500,1500)

I wrote the following update function, that works well on a single match:

up.rat<- function(h.pts, a.pts, h.rbef, a.rbef, hca=64) {
    h.prob = win.probs(h.rbef, a.rbef, hca)[1]
    a.prob = win.probs(h.rbef, a.rbef, hca)[2]
    h.win = ifelse(h.pts-a.pts>0,1,0)
    a.win = ifelse(h.pts-a.pts<0,1,0)
    k = rat.k(h.pts,a.pts,h.rbef,a.rbef)
    h.raft = h.rbef + k * (h.win - h.prob) 
    a.raft = a.rbef + k * (a.win - a.prob) 
  return(c(h.rbef,a.rbef,h.raft,a.raft))
}
#example (not run): up.rat(94,84,1500,1500)

and, applying it "manually" to the data I found the results above. For example the first game is GS vs MIL: before playing the match both teams has a rating of 1500, after the game the home team has 1508.487, while the away team has 1491.513 (it's a zero-sum rating). So GS will start next game with this updated rating, same for MIL.

Can someone please help me founding a way to do this "automatically" as my original data has way more than 15 rows? My custom functions seems to work good, what I found really challenging here is to update the rating, because teams don't necessary play a match at home and the following away: the value of rating before is equal to rating after of the previous match both it was played home and away . Note also that the number of matches played is not necessary the same for each team (here for example MIL played 6 matches, LAC 4, and the others 5).

Thanks to anyone who will try to give me any hint or help.

Upvotes: 2

Views: 63

Answers (1)

akrun
akrun

Reputation: 887223

We could create a function

f1 <- function(dat, start_val) {
       dat[c("h.rbef", "a.rbef", "h.raft", "a.raft")] <- start_val
       for(i in seq_len(nrow(data))) {
       
    
        if(i == 1) {
    
           h.rbef <- dat$h.rbef[1]
           a.rbef <- dat$a.rbef[1]
    
    
        } else {
    
          hh.ind <- with(dat, tail(which(H.team[seq_len(i-1)] %in% H.team[i]), 1))
          ha.ind <- with(dat, tail(which(A.team[seq_len(i-1)] %in% H.team[i]), 1))  
      
          aa.ind <- with(dat, tail(which(A.team[seq_len(i-1)] %in% A.team[i]), 1))
          ah.ind <- with(dat, tail(which(H.team[seq_len(i-1)] %in% A.team[i]), 1))
      
          if(length(hh.ind) > 0 & length(ha.ind) > 0 ) {
               ix <- which.max(c(hh.ind, ha.ind))
               mx <- max(hh.ind, ha.ind)
               if(ix == 1) {
                 h.rbef <- dat$h.raft[mx]
           
               } else {
                 h.rbef <- dat$a.raft[mx]
           
               }
      
          } else {
        
                 if(length(hh.ind) > 0) {
            
                 h.rbef <- dat$h.raft[hh.ind]
      
                }   else if(length(ha.ind) > 0) {
            
                 h.rbef <- dat$a.raft[ha.ind]
      
                } else {
      
                 h.rbef <- dat$h.rbef[i]
               }
          }
      
          if(length(aa.ind) > 0 & length(ah.ind) > 0 ) {
               iy <- which.max(c(aa.ind, ah.ind))
               my <- max(aa.ind, ah.ind)
               if(iy == 1) {
                 a.rbef <- dat$a.raft[my]
           
               } else {
                 a.rbef <- dat$h.raft[my]
           
               }
      
          } else {
      
            if(length(aa.ind) > 0) {
      
               a.rbef <- dat$a.raft[aa.ind]
      
                }   else if(length(ah.ind) > 0) {
      
                 a.rbef <- dat$h.raft[ah.ind]
      
                } else {
      
                 a.rbef <- dat$a.rbef[i]
             }
             }
      
      
      
      
    
        }    
    
    
           tmp <- up.rat(dat$H.pts[i], dat$A.pts[i], h.rbef, a.rbef)
            dat[i, c("h.rbef", "a.rbef", "h.raft", "a.raft")] <- tmp
       }
       return(dat)


}



-testing

out <- f1(data, 1500)

-output

out
#   ID H.team A.team H.pts A.pts   h.rbef   a.rbef   h.raft   a.raft
#1   1     GS    MIL    94    84 1500.000 1500.000 1508.487 1491.513
#2   2    LAC    CHA   120   107 1500.000 1500.000 1510.021 1489.979
#3   3    MIL    TOR    91    99 1491.513 1500.000 1481.066 1510.447
#4   4    CHA    ATL    84    75 1489.979 1500.000 1498.279 1491.700
#5   5    MIL     GS    88    90 1481.066 1508.487 1475.842 1513.711
#6   6    ATL    MIL    96   105 1491.700 1475.842 1479.614 1487.928
#7   7    TOR    LAC    93    87 1510.447 1510.021 1516.760 1503.708
#8   8    CHA     GS    95    99 1498.279 1513.711 1491.164 1520.826
#9   9    LAC    TOR   113    94 1503.708 1516.760 1517.357 1503.111
#10 10     GS    ATL    85    87 1520.826 1479.614 1514.361 1486.079
#11 11    TOR    CHA   101    92 1503.111 1491.164 1510.678 1483.597
#12 12    MIL    LAC   116   106 1487.928 1517.357 1497.501 1507.783
#13 13    ATL     GS    86    84 1486.079 1514.361 1490.516 1509.924
#14 14    CHA    MIL   102    89 1483.597 1497.501 1494.214 1486.885
#15 15    TOR    ATL    90    89 1510.678 1490.516 1513.710 1487.484

Upvotes: 3

Related Questions