Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""LICENSE 

2Copyright 2020 Hermann Krumrey <hermann@krumreyh.com> 

3 

4This file is part of betbot. 

5 

6betbot is free software: you can redistribute it and/or modify 

7it under the terms of the GNU General Public License as published by 

8the Free Software Foundation, either version 3 of the License, or 

9(at your option) any later version. 

10 

11betbot is distributed in the hope that it will be useful, 

12but WITHOUT ANY WARRANTY; without even the implied warranty of 

13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

14GNU General Public License for more details. 

15 

16You should have received a copy of the GNU General Public License 

17along with betbot. If not, see <http://www.gnu.org/licenses/>. 

18LICENSE""" 

19 

20import os 

21from typing import List, Tuple, Dict, Union, Optional, Any 

22 

23from betbot.data.OddsPortal import OddsPortal 

24from joblib import load, dump 

25from numpy import ndarray, array, concatenate 

26from betbot.api.Bet import Bet 

27from betbot.api.Match import Match 

28from betbot.api.ApiConnection import ApiConnection 

29from betbot.prediction.Predictor import Predictor 

30from betbot.data.FootballDataCoUk import FootballDataUk 

31from sklearn.feature_extraction.text import CountVectorizer 

32from sklearn.neural_network import MLPRegressor 

33 

34 

35# noinspection PyAbstractClass 

36class SKLearnPredictor(Predictor): 

37 """ 

38 Abstract class that defines how a scikit-learn-based predictor 

39 should operate 

40 """ 

41 

42 def __init__(self, api: ApiConnection, league: str, season: int): 

43 """ 

44 Initializes the scikit-learn model 

45 :param api: The bundesliga-tippspiel API connection 

46 :param league: The league for which to predict matches 

47 :param season: The season for which to predict matches 

48 """ 

49 super().__init__(api, league, season) 

50 self.__odds: \ 

51 Optional[Dict[Tuple[str, str], Tuple[float, float, float]]] = None 

52 self.model_path = os.path.join(self.model_dir, self.name() + ".model") 

53 self.history_path = os.path.join(self.model_dir, "history") 

54 self.fetcher = FootballDataUk(self.history_path) 

55 os.makedirs(self.history_path, exist_ok=True) 

56 

57 if not os.path.isfile(self.model_path): 

58 self.model: Dict[str, Any] = {} 

59 self.logger.info("Training model") 

60 self.train() 

61 dump(self.model, self.model_path) 

62 else: 

63 self.model = load(self.model_path) 

64 

65 @property 

66 def odds(self) -> Dict[Tuple[str, str], Tuple[float, float, float]]: 

67 """ 

68 Retrieves current odds using a mix of football-data.co.uk and 

69 oddsportal.com 

70 :return: The odds for each match in the selected league 

71 """ 

72 if self.__odds is None: 

73 self.__odds = self.fetcher.get_odds() 

74 if len(self.__odds) < 9: 

75 oddsportal = OddsPortal() 

76 self.__odds.update(oddsportal.get_odds(self.league)) 

77 return self.__odds 

78 

79 @classmethod 

80 def regressor(cls) -> MLPRegressor: 

81 """ 

82 Defines the regressor used during the prediction process 

83 :return: The predictor 

84 """ 

85 return MLPRegressor(hidden_layer_sizes=(64,)) 

86 

87 def train(self): 

88 """ 

89 Trains the prediction model 

90 """ 

91 self.fetcher.download_history() 

92 matches = self.fetcher.get_history_matches() 

93 team_vectorizer = CountVectorizer(binary=True) 

94 team_vectorizer.fit([ 

95 " ".join([x["home_team"] for x in matches]), 

96 " ".join([x["away_team"] for x in matches]) 

97 ]) 

98 self.model["team_vectorizer"] = team_vectorizer 

99 

100 regressor = self.regressor() 

101 

102 inputs = [] 

103 outputs = [] 

104 for match in matches: 

105 inputs.append(self.vectorize(match)) 

106 outputs.append( 

107 self.encode_result(match["home_score"], match["away_score"]) 

108 ) 

109 regressor.fit(array(inputs), array(outputs)) 

110 self.model["regressor"] = regressor 

111 

112 def vectorize(self, match_data: Dict[str, Union[str, float]]) -> ndarray: 

113 """ 

114 Defines how a match is vectorized 

115 :param match_data: The match data to vectorize 

116 :return: The vector for the match 

117 """ 

118 team_vectorizer: CountVectorizer = self.model["team_vectorizer"] 

119 home, away = team_vectorizer.transform([ 

120 match_data["home_team"], match_data["away_team"] 

121 ]).toarray() 

122 odds = array([ 

123 1 / float(match_data["home_odds"]), 

124 1 / float(match_data["draw_odds"]), 

125 1 / float(match_data["away_odds"]), 

126 ]) 

127 return concatenate((home, away, odds)) 

128 

129 def predict_match(self, match: Match) -> Optional[Tuple[int, int]]: 

130 """ 

131 Predicts the result of a single match using the trained model 

132 :param match: The match to predict 

133 :return: The home goals and away goals or None 

134 if no prediction took place 

135 """ 

136 match_tuple = (match.home_team, match.away_team) 

137 odds = self.odds.get(match_tuple) 

138 if odds is None: 

139 return None 

140 

141 match_data: Dict[str, Union[str, float]] = { 

142 "home_team": match.home_team, 

143 "away_team": match.away_team, 

144 "home_odds": float(odds[0]), 

145 "draw_odds": float(odds[1]), 

146 "away_odds": float(odds[2]) 

147 } 

148 vector = array([self.vectorize(match_data)]) 

149 results = self.model["regressor"].predict(vector)[0] 

150 home_score, away_score = results 

151 min_score = min([home_score, away_score]) 

152 normer = round(min_score) 

153 home_score = round(home_score - min_score + normer) 

154 away_score = round(away_score - min_score + normer) 

155 return home_score, away_score 

156 

157 # noinspection PyMethodMayBeStatic 

158 def encode_result(self, home_score: int, away_score: int) -> ndarray: 

159 """ 

160 Encodes a result vector 

161 This is done as a normalization step. 

162 :param home_score: The home score to encode 

163 :param away_score: The away score to encode 

164 :return: The encoded result vector 

165 """ 

166 return array([home_score, away_score]) 

167 

168 # noinspection PyMethodMayBeStatic 

169 def interpret_results(self, home_result: float, away_result: float) -> \ 

170 Tuple[int, int]: 

171 """ 

172 Interprets the raw results 

173 :param home_result: The home goals result 

174 :param away_result: The away goals result 

175 :return: The home goals, the away goals 

176 """ 

177 return round(home_result), round(away_result) 

178 

179 def predict(self, matches: List[Match]) -> List[Bet]: 

180 """ 

181 Performs the prediction 

182 :param matches: The matches to predict 

183 :return: The predictions as Bet objects 

184 """ 

185 bets = [] 

186 for match in matches: 

187 prediction = self.predict_match(match) 

188 if prediction is not None: 

189 home_goals, away_goals = prediction 

190 bets.append(Bet(match, home_goals, away_goals)) 

191 return bets