df <- apply(s1819, 1, function(row){data.frame(team=c(row['HomeTeam'], row['AwayTeam']), opponent=c(row['AwayTeam'], row['HomeTeam']), goals=c(row['FTHG'], row['FTAG']), home=c(1, 0))})

df <- do.call(rbind, df)

head(df)

df$goals <- as.numeric(df$goals)

str(df)

df_firsthalf <- head(df, nrow(df)/2)

df_secondhalf <- tail(df, nrow(df)/2)

#Fitting the poisson model using the 'df' data from above

model <- glm(goals~ home + team + opponent, family=poisson(link=log), data=df_firsthalf)

summary(model)

#expected number of goals for both teams

predictHome <- predict(model, data.frame(home=1, team="Southampton", opponent="West Ham"), type="response")

predictAway <- predict(model, data.frame(home=0, team="West Ham", opponent="Southampton"), type="response")

print(predictHome)

print(predictAway)

Is that correct for the prediction part? Will it predict the second half with the way I have done it?

]]>df_firsthalf < - df[1:180,] df_secondhald <- df[181:380,] or if you have a date variable in your data frame that is correctly formated as a Date type, you can do df_first <- df[df$date <= '2018-31-12',] df_second <- df[df$date > ‘2018-31-12’,]

Since you have problem with such a basic task in R, I would recomend you go trough a R for beginers tutorial or something.

]]>