From 2dc40df55e07a7d170c6827896a62bc9911829f0 Mon Sep 17 00:00:00 2001 From: Robert Clark Date: Tue, 24 Dec 2019 16:13:47 -0600 Subject: [PATCH] Add game summary for MLB boxscores A game summary including a inning-by-inning score should be included as an attribute to the MLB Boxscores class, which returns a dictionary of both the home and away team's score per half, including any extra inning results. Signed-Off-By: Robert Clark --- sportsreference/mlb/boxscore.py | 64 +++++++++++++++++++ sportsreference/mlb/constants.py | 1 + .../integration/boxscore/test_mlb_boxscore.py | 4 ++ tests/unit/test_mlb_boxscore.py | 25 ++++++++ 4 files changed, 94 insertions(+) diff --git a/sportsreference/mlb/boxscore.py b/sportsreference/mlb/boxscore.py index 672a0053..ec20d370 100644 --- a/sportsreference/mlb/boxscore.py +++ b/sportsreference/mlb/boxscore.py @@ -351,6 +351,7 @@ def __init__(self, uri): self._duration = None self._away_name = None self._home_name = None + self._summary = None self._winner = None self._winning_name = None self._winning_abbr = None @@ -502,6 +503,50 @@ def _parse_game_date_and_location(self, boxscore): setattr(self, '_time_of_day', time_of_day) setattr(self, '_venue', venue) + def _parse_summary(self, boxscore): + """ + Find the game summary including score in each inning. + + The game summary provides further information on the points scored + during each inning, including the final score and any extra innings if + applicable. The final output will be in a dictionary with two keys, + 'away' and 'home'. The value of each key will be a list for each + respective team's score by order of the inning, with the first element + belonging to the first inning, similar to the following: + + { + 'away': [0, 0, 0, 0, 0, 0, 0, 3, 1, 2], + 'home': [0, 1, 0, 0, 1, 0, 0, 0, 0, 0] + } + + Parameters + ---------- + boxscore : PyQuery object + A PyQuery object containing all of the HTML from the boxscore. + + Returns + ------- + dict + Returns a ``dictionary`` representing the score for each team in + each quarter of the game. + """ + team = ['away', 'home'] + summary = {'away': [], 'home': []} + game_summary = boxscore(BOXSCORE_SCHEME['summary']) + for ind, team_info in enumerate(game_summary('tr').items()): + ind = (ind + 1) % 2 + # Only pull the first N-1 items as the last three elements are the + # total runs, hits, and errors for each team which is already + # stored in an attribute, and shouldn't be duplicated. + for inning in list(team_info('td[class="center"]').items())[:-3]: + if inning('div'): + continue + try: + summary[team[ind]].append(int(inning.text())) + except ValueError: + summary[team[ind]].append(None) + return summary + def _parse_name(self, field, boxscore): """ Retrieve the team's complete name tag. @@ -765,6 +810,10 @@ def _parse_game_data(self, uri): value = self._parse_name(short_field, boxscore) setattr(self, field, value) continue + if short_field == 'summary': + value = self._parse_summary(boxscore) + setattr(self, field, value) + continue index = 0 if short_field in BOXSCORE_ELEMENT_INDEX.keys(): index = BOXSCORE_ELEMENT_INDEX[short_field] @@ -938,6 +987,21 @@ def time_of_day(self): return NIGHT return DAY + @property + def summary(self): + """ + Returns a ``dictionary`` with two keys, 'away' and 'home'. The value of + each key will be a list for each respective team's score by order of + the inning, with the first element belonging to the first inning, + similar to the following: + + { + 'away': [5, 0, 1, 0, 0, 0, 0, 1, 0], + 'home': [1, 0, 0, 0, 1, 0, 0, 0, 0] + } + """ + return self._summary + @property def winner(self): """ diff --git a/sportsreference/mlb/constants.py b/sportsreference/mlb/constants.py index fe73efcf..72ddb865 100644 --- a/sportsreference/mlb/constants.py +++ b/sportsreference/mlb/constants.py @@ -144,6 +144,7 @@ 'away_name': 'a[itemprop="name"]:first', 'home_name': 'a[itemprop="name"]:last', 'winner': 'td[data-stat=""]', + 'summary': 'table[class="linescore nohover stats_table no_freeze"]', 'winning_name': 'td[data-stat=""]', 'winning_abbr': 'td[data-stat=""]', 'losing_name': 'td[data-stat=""]', diff --git a/tests/integration/boxscore/test_mlb_boxscore.py b/tests/integration/boxscore/test_mlb_boxscore.py index ab00ad53..71a48f0b 100644 --- a/tests/integration/boxscore/test_mlb_boxscore.py +++ b/tests/integration/boxscore/test_mlb_boxscore.py @@ -139,6 +139,10 @@ def setup_method(self, *args, **kwargs): def test_mlb_boxscore_returns_requested_boxscore(self): for attribute, value in self.results.items(): assert getattr(self.boxscore, attribute) == value + assert getattr(self.boxscore, 'summary') == { + 'away': [5, 0, 1, 0, 0, 0, 0, 1, 0], + 'home': [1, 0, 0, 0, 1, 0, 0, 0, 0] + } def test_invalid_url_yields_empty_class(self): flexmock(Boxscore) \ diff --git a/tests/unit/test_mlb_boxscore.py b/tests/unit/test_mlb_boxscore.py index 47890c02..26fcc3ec 100644 --- a/tests/unit/test_mlb_boxscore.py +++ b/tests/unit/test_mlb_boxscore.py @@ -169,6 +169,31 @@ def test_losing_abbr_is_away(self): assert self.boxscore.losing_abbr == expected_name + def test_game_summary_with_no_scores_returns_none(self): + result = Boxscore(None)._parse_summary(pq( + """ + + + + + + + + + + + + + + +
""" + )) + + assert result == { + 'away': [None], + 'home': [None] + } + @patch('requests.get', side_effect=mock_pyquery) def test_invalid_url_returns_none(self, *args, **kwargs): result = Boxscore(None)._retrieve_html_page('')