R functions for soccer league tables and result matrix

Here are three R functions i wrote to calculate ranking tables in soccer leagues based on the result of played matches. The functions are made for ordinary leagues where each team play every other team twice, one time at the home field, the other at the opposing teams home field, but the match.result() and league.table() function can be used on more general data.

The first function, match.results() just computes the outcome of a match (Home, Draw or Away, i.e “H”, “D” or “A”) based on number of goals scored, and is used by the other two functions.

> res <- match.results(c(1,2,1,2,3,1,0,5), c(0,1,2,0,3,0,4,0))
> res
[1] "H" "H" "A" "H" "D" "H" "A" "H"

The league.table() function returns a data.frame with some statistics for each team, such as number of wins, draws, loss (for both home and away games), goals, goal difference etc. As input it takes vectors with the name of the home team, away team, goals score by the home team and goals scored by the away team. Three points are given for a win, one point for a draw, and zero points for a loss, as is used in most leagues. If you want to compute an alternative table with a different point scheme you can just change the three variables first in the function body. The teams are ranked by the number of points awarded, but if two or more teams have the same numbero of points, they are ranked by goal difference. If the goal difference is also equal, number of goals scored is used.

#load data from football-data.co.uk
matchdata <- read.csv("premierLeague2011-11.csv")
attach(matchdata)
league.table(HomeTeam, AwayTeam, FTHG, FTAG)

            PLD HW HD HL AW AD AL GF GA  GD PTS
Man United   38 18  1  0  5 10  4 78 37  41  80
Chelsea      38 14  3  2  7  5  7 69 33  36  71
Man City     38 13  4  2  8  4  7 60 33  27  71
Arsenal      38 11  4  4  8  7  4 72 43  29  68
Tottenham    38  9  9  1  7  5  7 55 46   9  62
Liverpool    38 12  4  3  5  3 11 59 44  15  58
Everton      38  9  7  3  4  8  7 51 45   6  54
Fulham       38  8  7  4  3  9  7 49 43   6  49
Aston Villa  38  8  7  4  4  5 10 48 59 -11  48
Sunderland   38  7  5  7  5  6  8 45 56 -11  47
West Brom    38  8  6  5  4  5 10 56 71 -15  47
Newcastle    38  6  8  5  5  5  9 56 57  -1  46
Stoke        38 10  4  5  3  3 13 46 48  -2  46
Bolton       38 10  5  4  2  5 12 52 56  -4  46
Blackburn    38  7  7  5  4  3 12 46 59 -13  43
Wigan        38  5  8  6  4  7  8 40 61 -21  42
Wolves       38  8  4  7  3  3 13 46 66 -20  40
Birmingham   38  6  8  5  2  7 10 37 58 -21  39
Blackpool    38  5  5  9  5  4 10 55 78 -23  39
West Ham     38  5  5  9  2  7 10 43 70 -27  33

The last function is result.matrix(), which returns a matrix with the match results. with home teams on the rows, and away teams on the columns. The cell contents can be formated in three different ways using the format argument. By default this is set to “score” which gives the output like “2 – 1”. “HDA” gives either “A”, “D” or “H”. “difference” gives the goal difference. The diagonal consists of “NA”s.

#only the five first rows and columns to save space
result.matrix(m$HomeTeam, m$AwayTeam, m$FTHG, m$FTAG, format="score")[1:5,1:5]

            Arsenal Aston Villa Birmingham Blackburn Blackpool
Arsenal     NA      "1 - 2"     "2 - 1"    "0 - 0"   "6 - 0"  
Aston Villa "2 - 4" NA          "0 - 0"    "4 - 1"   "3 - 2"  
Birmingham  "0 - 3" "1 - 1"     NA         "2 - 1"   "2 - 0"  
Blackburn   "1 - 2" "2 - 0"     "1 - 1"    NA        "2 - 2"  
Blackpool   "1 - 3" "1 - 1"     "1 - 2"    "1 - 2"   NA       

And here is the code for the three functions.

match.results <- function(homeGoals, awayGoals){
  #Determines the match outcome (H, D or A) based on goals scored by home and away teams.
  
  home <- homeGoals > awayGoals
  away <- awayGoals > homeGoals
  draws <- homeGoals == awayGoals
    
  results <- character(length(homeGoals))
  results[draws] <- "D"
  results[home] <- "H"
  results[away] <- "A"

  return(results)
}

league.table <- function(homeTeam, awayTeam, homeGoals, awayGoals){
                         
  #points awarded for a match outcome  
  winPts <- 3
  drawPts <- 1
  loosePts <- 0
  
  if (length(unique(sapply(list(homeTeam, awayTeam, homeGoals, awayGoals), length))) != 1 ){
    warning("input vectors not of same length.")
  }
  
  numMatches <- length(homeTeam)
  
  teams <- levels(factor(c(as.character(homeTeam), as.character(awayTeam))))
  numTeams <- length(teams)
  
  #vector with outcome of a match (H, D or A)
  results <- match.results(homeGoals, awayGoals)
  
  #for output
  homeWins <- numeric(numTeams)
  homeDraws <- numeric(numTeams)
  homeLoss <- numeric(numTeams)
  awayWins <- numeric(numTeams)
  awayDraws <- numeric(numTeams)
  awayLoss <- numeric(numTeams)
  goalsFor <- numeric(numTeams)
  goalsAgainst <- numeric(numTeams)
  goalsDifference <- numeric(numTeams)
  playedMatches <- numeric(numTeams)
  pts <- numeric(numTeams)

  for (t in 1:numTeams) {
    #mathc results for a given team
    homeResults <- results[homeTeam == teams[t]]
    awayResults <- results[awayTeam == teams[t]]

    playedMatches[t] <- length(homeResults) + length(awayResults)
    
    goalsForH <- sum(homeGoals[homeTeam == teams[t]])
    goalsForA <- sum(awayGoals[awayTeam == teams[t]])
    goalsFor[t] <- goalsForA + goalsForH
    goalsAgainstH <- sum(awayGoals[homeTeam == teams[t]])
    goalsAgainstA <- sum(homeGoals[awayTeam == teams[t]])
    goalsAgainst[t] <- goalsAgainstA + goalsAgainstH
    goalsDifference[t] <- goalsFor[t] - goalsAgainst[t]
    
    homeWins[t] <- sum(homeResults == "H")
    homeDraws[t] <- sum(homeResults == "D")
    homeLoss[t] <- sum(homeResults == "A")
    awayWins[t] <- sum(awayResults == "A")
    awayDraws[t] <- sum(awayResults == "D")
    awayLoss[t] <- sum(awayResults == "H")
      
    totWins <- homeWins[t] + awayWins[t]
    totDraws <- homeDraws[t] + awayDraws[t]
    totLoss <- homeLoss[t] + awayLoss[t]
    
    pts[t] <- (winPts * totWins) + (drawPts * totDraws) + (loosePts * totLoss)
    
    }

  table <- data.frame(cbind(playedMatches, homeWins, homeDraws, 
                            homeLoss, awayWins, awayDraws, awayLoss, 
                            goalsFor, goalsAgainst, goalsDifference, pts),
                      row.names=teams)

    
  names(table) <- c("PLD", "HW", "HD", "HL", "AW", "AD", "AL", "GF", "GA", "GD", "PTS")
  ord <- order(-table$PTS, -table$GD, -table$GF)
  table <- table[ord, ]

  return(table)

  }
  
result.matrix <- function(homeTeam, awayTeam, homeGoals, awayGoals, format="score"){
  
  if (length(unique(sapply(list(homeTeam, awayTeam, homeGoals, awayGoals), length))) != 1 ){
    warning("input vectors not of same length.")
  }
  
  teams <- levels(factor(c(as.character(homeTeam), as.character(awayTeam))))
  numTeams <- length(teams)
  numMatches <- length(homeTeam)
  
  if (format == "HDA"){
    results <- match.results(homeGoals, awayGoals)
  }
  
  resultMatrix <- matrix(nrow=numTeams, ncol=numTeams, dimnames=list(teams, teams))
  
  for (m in 1:numMatches){
    
    if (format == "score"){
      cell <- paste(homeGoals[m], "-", awayGoals[m])
      }
    else if (format == "HDA"){
      cell <- results[m]
    }
    else if (format == "difference"){
      cell <- homeGoals[m] - awayGoals[m]
    }
    
    resultMatrix[homeTeam[m], awayTeam[m]] <- cell
  }
    
  return(resultMatrix)
  
}