Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""LICENSE
2Copyright 2020 Hermann Krumrey <hermann@krumreyh.com>
4This file is part of betbot.
6betbot is free software: you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation, either version 3 of the License, or
9(at your option) any later version.
11betbot is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
16You should have received a copy of the GNU General Public License
17along with betbot. If not, see <http://www.gnu.org/licenses/>.
18LICENSE"""
20import os
21from typing import List, Tuple, Dict, Union, Optional, Any
23from betbot.data.OddsPortal import OddsPortal
24from joblib import load, dump
25from numpy import ndarray, array, concatenate
26from betbot.api.Bet import Bet
27from betbot.api.Match import Match
28from betbot.api.ApiConnection import ApiConnection
29from betbot.prediction.Predictor import Predictor
30from betbot.data.FootballDataCoUk import FootballDataUk
31from sklearn.feature_extraction.text import CountVectorizer
32from sklearn.neural_network import MLPRegressor
35# noinspection PyAbstractClass
36class SKLearnPredictor(Predictor):
37 """
38 Abstract class that defines how a scikit-learn-based predictor
39 should operate
40 """
42 def __init__(self, api: ApiConnection, league: str, season: int):
43 """
44 Initializes the scikit-learn model
45 :param api: The bundesliga-tippspiel API connection
46 :param league: The league for which to predict matches
47 :param season: The season for which to predict matches
48 """
49 super().__init__(api, league, season)
50 self.__odds: \
51 Optional[Dict[Tuple[str, str], Tuple[float, float, float]]] = None
52 self.model_path = os.path.join(self.model_dir, self.name() + ".model")
53 self.history_path = os.path.join(self.model_dir, "history")
54 self.fetcher = FootballDataUk(self.history_path)
55 os.makedirs(self.history_path, exist_ok=True)
57 if not os.path.isfile(self.model_path):
58 self.model: Dict[str, Any] = {}
59 self.logger.info("Training model")
60 self.train()
61 dump(self.model, self.model_path)
62 else:
63 self.model = load(self.model_path)
65 @property
66 def odds(self) -> Dict[Tuple[str, str], Tuple[float, float, float]]:
67 """
68 Retrieves current odds using a mix of football-data.co.uk and
69 oddsportal.com
70 :return: The odds for each match in the selected league
71 """
72 if self.__odds is None:
73 self.__odds = self.fetcher.get_odds()
74 if len(self.__odds) < 9:
75 oddsportal = OddsPortal()
76 self.__odds.update(oddsportal.get_odds(self.league))
77 return self.__odds
79 @classmethod
80 def regressor(cls) -> MLPRegressor:
81 """
82 Defines the regressor used during the prediction process
83 :return: The predictor
84 """
85 return MLPRegressor(hidden_layer_sizes=(64,))
87 def train(self):
88 """
89 Trains the prediction model
90 """
91 self.fetcher.download_history()
92 matches = self.fetcher.get_history_matches()
93 team_vectorizer = CountVectorizer(binary=True)
94 team_vectorizer.fit([
95 " ".join([x["home_team"] for x in matches]),
96 " ".join([x["away_team"] for x in matches])
97 ])
98 self.model["team_vectorizer"] = team_vectorizer
100 regressor = self.regressor()
102 inputs = []
103 outputs = []
104 for match in matches:
105 inputs.append(self.vectorize(match))
106 outputs.append(
107 self.encode_result(match["home_score"], match["away_score"])
108 )
109 regressor.fit(array(inputs), array(outputs))
110 self.model["regressor"] = regressor
112 def vectorize(self, match_data: Dict[str, Union[str, float]]) -> ndarray:
113 """
114 Defines how a match is vectorized
115 :param match_data: The match data to vectorize
116 :return: The vector for the match
117 """
118 team_vectorizer: CountVectorizer = self.model["team_vectorizer"]
119 home, away = team_vectorizer.transform([
120 match_data["home_team"], match_data["away_team"]
121 ]).toarray()
122 odds = array([
123 1 / float(match_data["home_odds"]),
124 1 / float(match_data["draw_odds"]),
125 1 / float(match_data["away_odds"]),
126 ])
127 return concatenate((home, away, odds))
129 def predict_match(self, match: Match) -> Optional[Tuple[int, int]]:
130 """
131 Predicts the result of a single match using the trained model
132 :param match: The match to predict
133 :return: The home goals and away goals or None
134 if no prediction took place
135 """
136 match_tuple = (match.home_team, match.away_team)
137 odds = self.odds.get(match_tuple)
138 if odds is None:
139 return None
141 match_data: Dict[str, Union[str, float]] = {
142 "home_team": match.home_team,
143 "away_team": match.away_team,
144 "home_odds": float(odds[0]),
145 "draw_odds": float(odds[1]),
146 "away_odds": float(odds[2])
147 }
148 vector = array([self.vectorize(match_data)])
149 results = self.model["regressor"].predict(vector)[0]
150 home_score, away_score = results
151 min_score = min([home_score, away_score])
152 normer = round(min_score)
153 home_score = round(home_score - min_score + normer)
154 away_score = round(away_score - min_score + normer)
155 return home_score, away_score
157 # noinspection PyMethodMayBeStatic
158 def encode_result(self, home_score: int, away_score: int) -> ndarray:
159 """
160 Encodes a result vector
161 This is done as a normalization step.
162 :param home_score: The home score to encode
163 :param away_score: The away score to encode
164 :return: The encoded result vector
165 """
166 return array([home_score, away_score])
168 # noinspection PyMethodMayBeStatic
169 def interpret_results(self, home_result: float, away_result: float) -> \
170 Tuple[int, int]:
171 """
172 Interprets the raw results
173 :param home_result: The home goals result
174 :param away_result: The away goals result
175 :return: The home goals, the away goals
176 """
177 return round(home_result), round(away_result)
179 def predict(self, matches: List[Match]) -> List[Bet]:
180 """
181 Performs the prediction
182 :param matches: The matches to predict
183 :return: The predictions as Bet objects
184 """
185 bets = []
186 for match in matches:
187 prediction = self.predict_match(match)
188 if prediction is not None:
189 home_goals, away_goals = prediction
190 bets.append(Bet(match, home_goals, away_goals))
191 return bets