Skip to content

Commit

Permalink
Enable NBA Team to be directly accessible
Browse files Browse the repository at this point in the history
Instead of requiring users to go through the Teams class to get a
specific team, the NBA modules now enable a specific team to be directly
queried by using the Team class. This reduces computational complexity
by removing the need to instantiate every team while also making it more
intuitive for users.

Signed-Off-By: Robert Clark <robdclark@outlook.com>
  • Loading branch information
roclark committed Apr 10, 2020
1 parent c5ea9e8 commit 85c23d7
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 79 deletions.
11 changes: 11 additions & 0 deletions docs/nba.rst
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,17 @@ number of shots they've blocked, and much more.
print(team.name) # Prints the team's name
print(team.blocks) # Prints the team's total blocked shots
A team can also be requested directly by calling the ``Team`` class which
returns a Team instance identical to the one in each element in the loop above.
To request a specific team, use the 3-letter abbreviation for the team while
calling Team class.

.. code-block:: python
from sportsreference.nba.teams import Team
houston = Team('HOU')
Each Team instance contains a link to the ``Schedule`` class which enables easy
iteration over all games for a particular team. A Pandas DataFrame can also be
queried to easily grab all stats for all games.
Expand Down
80 changes: 80 additions & 0 deletions sportsreference/nba/nba_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from .constants import PARSING_SCHEME, SEASON_PAGE_URL
from pyquery import PyQuery as pq
from sportsreference import utils


def _add_stats_data(teams_list, team_data_dict):
"""
Add a team's stats row to a dictionary.
Pass table contents and a stats dictionary of all teams to accumulate all
stats for each team in a single variable.
Parameters
----------
teams_list : generator
A generator of all row items in a given table.
team_data_dict : {str: {'data': str, 'rank': int}} dictionary
A dictionary where every key is the team's abbreviation and every value
is another dictionary with a 'data' key which contains the string
version of the row data for the matched team, and a 'rank' key which is
the rank of the team.
Returns
-------
dictionary
An updated version of the team_data_dict with the passed table row
information included.
"""
# Teams are listed in terms of rank with the first team being #1
rank = 1
for team_data in teams_list:
abbr = utils._parse_field(PARSING_SCHEME, team_data, 'abbreviation')
try:
team_data_dict[abbr]['data'] += team_data
except KeyError:
team_data_dict[abbr] = {'data': team_data, 'rank': rank}
rank += 1
return team_data_dict


def _retrieve_all_teams(year):
"""
Find and create Team instances for all teams in the given season.
For a given season, parses the specified NBA stats table and finds all
requested stats. Each team then has a Team instance created which includes
all requested stats and a few identifiers, such as the team's name and
abbreviation. All of the individual Team instances are added to a list.
Parameters
----------
year : string
The requested year to pull stats from.
Returns
-------
tuple
Returns a ``tuple`` of the team_data_dict and year which represent all
stats for all teams, and the given year that should be used to pull
stats from, respectively.
"""
team_data_dict = {}

if not year:
year = utils._find_year_for_season('nba')
# If stats for the requested season do not exist yet (as is the case
# right before a new season begins), attempt to pull the previous
# year's stats. If it exists, use the previous year instead.
if not utils._url_exists(SEASON_PAGE_URL % year) and \
utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)):
year = str(int(year) - 1)
doc = pq(SEASON_PAGE_URL % year)
teams_list = utils._get_stats_table(doc, 'div#all_team-stats-base')
opp_teams_list = utils._get_stats_table(doc, 'div#all_opponent-stats-base')
if not teams_list and not opp_teams_list:
utils._no_data_found()
return None, None
for stats_list in [teams_list, opp_teams_list]:
team_data_dict = _add_stats_data(stats_list, team_data_dict)
return team_data_dict, year
138 changes: 60 additions & 78 deletions sportsreference/nba/teams.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import pandas as pd
import re
from .constants import PARSING_SCHEME, SEASON_PAGE_URL
from pyquery import PyQuery as pq
from .constants import PARSING_SCHEME
from ..decorators import float_property_decorator, int_property_decorator
from .nba_utils import _retrieve_all_teams
from .. import utils
from .roster import Roster
from .schedule import Schedule
Expand All @@ -16,19 +15,26 @@ class Team:
name, and abbreviation, and sets them as properties which can be directly
read from for easy reference.
If calling directly, the team's abbreviation needs to be passed. Otherwise,
the Teams class will handle all arguments.
Parameters
----------
team_data : string
team_name : string (optional)
The name of the team to pull if being called directly.
team_data : string (optional)
A string containing all of the rows of stats for a given team. If
multiple tables are being referenced, this will be comprised of
multiple rows in a single string.
rank : int
multiple rows in a single string. Is only used when called directly
from the Teams class.
rank : int (optional)
A team's position in the league based on the number of points they
obtained during the season.
obtained during the season. Is only used when called directly from the
Teams class.
year : string (optional)
The requested year to pull stats from.
"""
def __init__(self, team_data, rank, year=None):
def __init__(self, team_name=None, team_data=None, rank=None, year=None):
self._year = year
self._rank = rank
self._abbreviation = None
Expand Down Expand Up @@ -78,8 +84,38 @@ def __init__(self, team_data, rank, year=None):
self._opp_personal_fouls = None
self._opp_points = None

if team_name:
team_data = self._retrieve_team_data(year, team_name)
self._parse_team_data(team_data)

def _retrieve_team_data(self, year, team_name):
"""
Pull all stats for a specific team.
By first retrieving a dictionary containing all information for all
teams in the league, only select the desired team for a specific year
and return only their relevant results.
Parameters
----------
year : string
A ``string`` of the requested year to pull stats from.
team_name : string
A ``string`` of the team's 3-letter abbreviation, such as 'HOU' for
the Houston Rockets.
Returns
-------
PyQuery object
Returns a PyQuery object containing all stats and information for
the specified team.
"""
team_data_dict, year = _retrieve_all_teams(year)
self._year = year
team_data = team_data_dict[team_name]['data']
self._rank = team_data_dict[team_name]['rank']
return team_data

def _parse_team_data(self, team_data):
"""
Parses a value for every attribute.
Expand Down Expand Up @@ -588,7 +624,8 @@ class Teams:
def __init__(self, year=None):
self._teams = []

self._retrieve_all_teams(year)
team_data_dict, year = _retrieve_all_teams(year)
self._instantiate_teams(team_data_dict, year)

def __getitem__(self, abbreviation):
"""
Expand Down Expand Up @@ -650,83 +687,28 @@ def __len__(self):
"""Returns the number of NBA teams for a given season."""
return len(self.__repr__())

def _add_stats_data(self, teams_list, team_data_dict):
def _instantiate_teams(self, team_data_dict, year):
"""
Add a team's stats row to a dictionary.
Create a Team instance for all teams.
Pass table contents and a stats dictionary of all teams to accumulate
all stats for each team in a single variable.
Parameters
----------
teams_list : generator
A generator of all row items in a given table.
team_data_dict : {str: {'data': str, 'rank': int}} dictionary
A dictionary where every key is the team's abbreviation and every
value is another dictionary with a 'data' key which contains the
string version of the row data for the matched team, and a 'rank'
key which is the rank of the team.
Returns
-------
dictionary
An updated version of the team_data_dict with the passed table row
information included.
"""
# Teams are listed in terms of rank with the first team being #1
rank = 1
for team_data in teams_list:
abbr = utils._parse_field(PARSING_SCHEME,
team_data,
'abbreviation')
try:
team_data_dict[abbr]['data'] += team_data
except KeyError:
team_data_dict[abbr] = {'data': team_data, 'rank': rank}
rank += 1
return team_data_dict

def _retrieve_all_teams(self, year):
"""
Find and create Team instances for all teams in the given season.
For a given season, parses the specified NBA stats table and finds all
requested stats. Each team then has a Team instance created which
includes all requested stats and a few identifiers, such as the team's
name and abbreviation. All of the individual Team instances are added
to a list.
Note that this method is called directly once Teams is invoked and does
not need to be called manually.
Once all team information has been pulled from the various webpages,
create a Team instance for each team and append it to a larger list of
team instances for later use.
Parameters
----------
team_data_dict : dictionary
A ``dictionary`` containing all stats information in HTML format as
well as team rankings, indexed by team abbreviation.
year : string
The requested year to pull stats from.
"""
team_data_dict = {}

if not year:
year = utils._find_year_for_season('nba')
# If stats for the requested season do not exist yet (as is the
# case right before a new season begins), attempt to pull the
# previous year's stats. If it exists, use the previous year
# instead.
if not utils._url_exists(SEASON_PAGE_URL % year) and \
utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)):
year = str(int(year) - 1)
doc = pq(SEASON_PAGE_URL % year)
teams_list = utils._get_stats_table(doc, 'div#all_team-stats-base')
opp_teams_list = utils._get_stats_table(doc,
'div#all_opponent-stats-base')
if not teams_list and not opp_teams_list:
utils._no_data_found()
A ``string`` of the requested year to pull stats from.
"""
if not team_data_dict:
return
for stats_list in [teams_list, opp_teams_list]:
team_data_dict = self._add_stats_data(stats_list, team_data_dict)

for team_data in team_data_dict.values():
team = Team(team_data['data'], team_data['rank'], year)
team = Team(team_data=team_data['data'],
rank=team_data['rank'],
year=year)
self._teams.append(team)

@property
Expand Down
8 changes: 7 additions & 1 deletion tests/integration/teams/test_nba_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from flexmock import flexmock
from sportsreference import utils
from sportsreference.nba.constants import SEASON_PAGE_URL
from sportsreference.nba.teams import Teams
from sportsreference.nba.teams import Team, Teams


MONTH = 1
Expand Down Expand Up @@ -167,6 +167,12 @@ def test_nba_empty_page_returns_no_teams(self):

assert len(teams) == 0

def test_pulling_team_directly(self):
detroit = Team('DET')

for attribute, value in self.results.items():
assert getattr(detroit, attribute) == value


class TestNBAIntegrationInvalidDate:
@mock.patch('requests.get', side_effect=mock_pyquery)
Expand Down

0 comments on commit 85c23d7

Please sign in to comment.