Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes and formatting #3

Merged
merged 4 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v4
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -113,4 +113,5 @@ venv.bak/

# Custom
NOTES
test.*
test.*
*.lock
14 changes: 9 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,18 @@ git clone https://github.com/gdifiore/pyball.git

cd pyball

pip install . OR poetry install
python -m venv .venv

(activate .venv)

poetry install

poetry run post-install
```

## Docs

Read the [docs](https://gdifiore.github.io/pyball/docs/pyball/index.html) for function descriptions.

For examples, look at my [MLBResearch Repo](https://github.com/gdifiore/MLBResearch/blob/main/Parse_BBRef_Table/bbref_table.ipynb) where I test most of the functions.

## Comments and Suggestions
Leave any comments or suggestions in [an issue](https://github.com/SummitCode/pyball/issues/new) or directly make make [a pull request](https://github.com/SummitCode/pyball/compare) adding code.

Expand All @@ -32,4 +35,5 @@ Leave any comments or suggestions in [an issue](https://github.com/SummitCode/py
`pyball` is licensed under the [MIT license](https://github.com/SummitCode/pyball/blob/master/LICENSE)

## TODO
- update documentation
- update documentation
- add checks to URLs passd into functions (e.g. team into team parsing)
7 changes: 6 additions & 1 deletion pyball/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,9 @@
from .pitching_stats import *
from .team_batting_stats import *
from .team_pitching_stats import *
from .savant import *
from .savant import *

import subprocess

def post_install():
subprocess.run(["playwright", "install"], check=True)
11 changes: 7 additions & 4 deletions pyball/batting_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@

import pandas as pd

from pyball.utils import readURL
from pyball.utils import read_url, is_player_url


def findBattingTable(soup):
def _find_batting_table(soup):
"""
Function to find the batting stats table (Baseball-Reference) in the soup

Expand Down Expand Up @@ -44,8 +44,11 @@ def batting_stats(url):
pandas dataframe
Contains the batting stats for the player
"""
soup = readURL(url)
table = findBattingTable(soup)
if not is_player_url(url):
return None

soup = read_url(url)
table = _find_batting_table(soup)

df = pd.read_html(str(table))[0]

Expand Down
11 changes: 7 additions & 4 deletions pyball/pitching_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@

import pandas as pd

from pyball.utils import readURL
from pyball.utils import read_url, is_player_url


def findPitchingTable(soup):
def _find_pitching_table(soup):
"""
Function to find the pitching stats table (Baseball-Reference) in the soup

Expand Down Expand Up @@ -44,8 +44,11 @@ def pitching_stats(url):
pandas dataframe
Contains the pitching stats for the player
"""
soup = readURL(url)
table = findPitchingTable(soup)
if not is_player_url(url):
return None

soup = read_url(url)
table = _find_pitching_table(soup)

df = pd.read_html(str(table))[0]

Expand Down
4 changes: 2 additions & 2 deletions pyball/playerid_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def download_file(file_number):
io.StringIO(response.content.decode("utf-8")),
dtype={"key_sr_nfl": object, "key_sr_nba": object, "key_sr_nhl": object},
)

@lru_cache(maxsize=1)
def get_lookup_table():
"""
Expand Down Expand Up @@ -100,4 +100,4 @@ def playerid_lookup(last, first=None):
query_string += f" and name_first == '{first}'" if first else ""
results = table.query(query_string)

return results.reset_index(drop=True)
return results.reset_index(drop=True)
55 changes: 34 additions & 21 deletions pyball/savant.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@

import pandas as pd

from pyball.utils import readURL
from pyball.utils import read_url


def findPercentilesTable(soup):
def _find_percentiles_table(soup):
"""
Function to find the stat percentiles table (Baseball Savant) in the soup

Expand All @@ -30,7 +30,7 @@ def findPercentilesTable(soup):
return table


def savantPercentileStats(url):
def savant_percentile_stats(url):
"""
Function to return the (Baseball Savant) percentile stats for a player as a pandas dataframe

Expand All @@ -44,15 +44,15 @@ def savantPercentileStats(url):
pandas dataframe
Contains the percentile stats for the player
"""
soup = readURL(url)
table = findPercentilesTable(soup)
soup = read_url(url)
table = _find_percentiles_table(soup)

df = pd.read_html(str(table))[0]

return df.dropna(how="all")


def findStatcastPitchingStatsTable(soup):
def _find_statcast_pitching_stats_table(soup):
"""
Function to find the statcast pitching stats table (Baseball Savant) in the soup

Expand All @@ -67,13 +67,16 @@ def findStatcastPitchingStatsTable(soup):
Contains the html of the statcast pitching stats table
"""
div = soup.find("div", id="statcast_stats_pitching")
if div is None:
print("Not a pitcher page")
return None
# get table inside div
table = div.find("table")

return table


def savantPitchingStatcastStats(url):
def savant_pitching_statcast_stats(url):
"""
Function to return the (Baseball Savant) pitching stats for a player as a pandas dataframe

Expand All @@ -87,16 +90,19 @@ def savantPitchingStatcastStats(url):
pandas dataframe
Contains the savant pitching stats for the player
"""
soup = readURL(url)
table = findStatcastPitchingStatsTable(soup)
soup = read_url(url)
table = _find_statcast_pitching_stats_table(soup)

if table is None:
return None

df = pd.read_html(str(table))[0]

# drop a row of all NA and drop last row of MLB average
return df.dropna(how="all").drop(df.index[-1])


def findStatcastBattingStatsTable(soup):
def _find_statcast_batting_stats_table(soup):
"""
Function to find the statcast batting stats table (Baseball Savant) in the soup

Expand All @@ -112,12 +118,16 @@ def findStatcastBattingStatsTable(soup):
"""
# Find div with id 'statcast_glance_batter' and get table inside
div = soup.find("div", id="statcast_glance_batter")
if div is None:
print("Not a pitcher page")
return None

table = div.find("table")

return table


def savantBattingStatcastStats(url):
def savant_batting_statcast_stats(url):
"""
Function to return the (Baseball Savant) batting stats for a player as a pandas dataframe

Expand All @@ -131,16 +141,19 @@ def savantBattingStatcastStats(url):
pandas dataframe
Contains the savant batting stats for the player
"""
soup = readURL(url)
table = findStatcastBattingStatsTable(soup)
soup = read_url(url)
table = _find_statcast_batting_stats_table(soup)

if table is None:
return None

df = pd.read_html(str(table))[0]

# drop a row of all NA and drop last row of MLB average
return df.dropna(how="all").drop(df.index[-1])


def findBattedBallProfileTable(soup):
def _find_batted_ball_profile_table(soup):
"""
Function to find the batted ball profile table (Baseball Savant) in the soup

Expand All @@ -159,7 +172,7 @@ def findBattedBallProfileTable(soup):
return table


def savantBattedBallProfile(url):
def savant_batted_ball_profile(url):
"""
Function to return the (Baseball Savant) batted ball profile for a player as a pandas dataframe

Expand All @@ -173,15 +186,15 @@ def savantBattedBallProfile(url):
pandas dataframe
Contains the batted ball profile for the player
"""
soup = readURL(url)
table = findBattedBallProfileTable(soup)
soup = read_url(url)
table = _find_batted_ball_profile_table(soup)

df = pd.read_html(str(table))[0]

return df.dropna(how="all")


def findPitchTrackingTable(soup):
def _find_pitch_tracking_table(soup):
"""
Function to find the pitch tracking table (Baseball Savant) in the soup

Expand All @@ -200,7 +213,7 @@ def findPitchTrackingTable(soup):
return table


def savantPitchTracking(url):
def savant_pitch_tracking(url):
"""
Function returns the (Baseball Savant) pitch-specific results for a player as a pandas dataframe

Expand All @@ -214,8 +227,8 @@ def savantPitchTracking(url):
pandas dataframe
Contains the pitch-specific results for the player
"""
soup = readURL(url)
table = findPitchTrackingTable(soup)
soup = read_url(url)
table = _find_pitch_tracking_table(soup)

df = pd.read_html(str(table))[0]

Expand Down
11 changes: 7 additions & 4 deletions pyball/team_batting_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@

import pandas as pd

from pyball.utils import readURL
from pyball.utils import read_url, is_team_url


def findTeamBattingTable(soup):
def _find_team_batting_table(soup):
"""
Function to find the team batting stats table (Baseball-Reference) in the soup

Expand Down Expand Up @@ -44,8 +44,11 @@ def team_batting_stats(url):
pandas dataframe
containing the team batting stats for the team
"""
soup = readURL(url)
table = findTeamBattingTable(soup)
if not is_team_url(url):
return None

soup = read_url(url)
table = _find_team_batting_table(soup)

df = pd.read_html(str(table))[0]

Expand Down
11 changes: 7 additions & 4 deletions pyball/team_pitching_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@

import pandas as pd

from pyball.utils import readURL
from pyball.utils import read_url, is_team_url


def findTeamPitchingTable(soup):
def _find_team_pitching_table(soup):
"""
Function to find the team pitching stats table (Baseball-Reference) in the soup

Expand Down Expand Up @@ -45,8 +45,11 @@ def team_pitching_stats(url):
pandas dataframe
containing the team batting stats for the team
"""
soup = readURL(url)
table = findTeamPitchingTable(soup)
if not is_team_url(url):
return None

soup = read_url(url)
table = _find_team_pitching_table(soup)

df = pd.read_html(str(table))[0]

Expand Down
Loading