The PitchPredict class is the main interface for making predictions in Python.
from pitchpredict import PitchPredictPitchPredict(
enable_cache: bool = True,
cache_dir: str = ".pitchpredict_cache",
enable_logging: bool = True,
log_dir: str = ".pitchpredict_logs",
log_level_console: str = "INFO",
log_level_file: str = "INFO",
fuzzy_player_lookup: bool = True,
algorithms: dict[str, PitchPredictAlgorithm] | None = None,
)| Parameter | Type | Default | Description |
|---|---|---|---|
enable_cache |
bool |
True |
Enable caching of Statcast data |
cache_dir |
str |
".pitchpredict_cache" |
Directory for cache files |
enable_logging |
bool |
True |
Enable logging to console and file |
log_dir |
str |
".pitchpredict_logs" |
Directory for log files |
log_level_console |
str |
"INFO" |
Log level for console output |
log_level_file |
str |
"INFO" |
Log level for file output |
fuzzy_player_lookup |
bool |
True |
Enable fuzzy matching for player names |
algorithms |
dict |
None |
Custom algorithm implementations |
Caching is enabled by default and stores Parquet datasets plus small metadata files in cache_dir. See Caching for layout and update behavior.
# Default configuration
client = PitchPredict()
# Custom configuration
client = PitchPredict(
enable_cache=True,
enable_logging=True,
log_level_console="DEBUG",
fuzzy_player_lookup=True
)Resolve a player's MLBAM ID from their name.
async def get_player_id_from_name(
player_name: str,
fuzzy_lookup: bool | None = None,
) -> int| Parameter | Type | Default | Description |
|---|---|---|---|
player_name |
str |
Required | Player name in "First Last" format |
fuzzy_lookup |
bool | None |
None |
Override fuzzy matching setting (uses client default if None) |
The player's MLBAM ID as an integer.
import asyncio
from pitchpredict import PitchPredict
async def main():
client = PitchPredict()
# Get player ID by name
judge_id = await client.get_player_id_from_name("Aaron Judge")
print(f"Aaron Judge's MLBAM ID: {judge_id}")
# 592450
# With explicit fuzzy matching
kershaw_id = await client.get_player_id_from_name("Kershaw", fuzzy_lookup=True)
print(f"Clayton Kershaw's MLBAM ID: {kershaw_id}")
asyncio.run(main())Resolve player records from a name, returning up to limit candidates.
async def get_player_records_from_name(
player_name: str,
fuzzy_lookup: bool | None = None,
limit: int = 1,
) -> list[dict[str, Any]]| Parameter | Type | Default | Description |
|---|---|---|---|
player_name |
str |
Required | Player name to search for |
fuzzy_lookup |
bool | None |
None |
Override fuzzy matching setting |
limit |
int |
1 |
Maximum number of results to return |
A list of player record dictionaries, each containing fields from pybaseball.playerid_lookup:
| Field | Type | Description |
|---|---|---|
name_first |
str |
First name |
name_last |
str |
Last name |
name_full |
str |
Full name |
key_mlbam |
int |
MLBAM ID |
mlb_played_first |
int |
First MLB season |
mlb_played_last |
int |
Last MLB season |
Additional fields from pybaseball may be included (e.g., key_retro, key_bbref, key_fangraphs).
import asyncio
from pitchpredict import PitchPredict
async def main():
client = PitchPredict()
# Search with multiple results
results = await client.get_player_records_from_name("Smith", limit=5)
for player in results:
print(f"{player['name_full']}: {player['key_mlbam']}")
asyncio.run(main())Resolve a player record from an MLBAM ID.
async def get_player_record_from_id(
mlbam_id: int,
) -> dict[str, Any]| Parameter | Type | Description |
|---|---|---|
mlbam_id |
int |
The player's MLBAM ID |
A player record dictionary with the same fields as get_player_records_from_name.
import asyncio
from pitchpredict import PitchPredict
async def main():
client = PitchPredict()
# Look up player by ID
record = await client.get_player_record_from_id(592450)
print(f"Name: {record['name_full']}")
print(f"Career: {record['mlb_played_first']}-{record['mlb_played_last']}")
asyncio.run(main())Predict the pitcher's next pitch given the game context.
async def predict_pitcher(
pitcher_id: int,
batter_id: int,
prev_pitches: list[Pitch] | None = None,
algorithm: str = "similarity",
sample_size: int = 1,
pitcher_age: int | None = None,
pitcher_throws: Literal["L", "R"] | None = None,
batter_age: int | None = None,
batter_hits: Literal["L", "R"] | None = None,
count_balls: int | None = None,
count_strikes: int | None = None,
outs: int | None = None,
bases_state: int | None = None,
score_bat: int | None = None,
score_fld: int | None = None,
inning: int | None = None,
pitch_number: int | None = None,
number_through_order: int | None = None,
game_date: str | None = None,
fielder_2_id: int | None = None,
fielder_3_id: int | None = None,
fielder_4_id: int | None = None,
fielder_5_id: int | None = None,
fielder_6_id: int | None = None,
fielder_7_id: int | None = None,
fielder_8_id: int | None = None,
fielder_9_id: int | None = None,
batter_days_since_prev_game: int | None = None,
pitcher_days_since_prev_game: int | None = None,
strike_zone_top: float | None = None,
strike_zone_bottom: float | None = None,
) -> PredictPitcherResponse| Parameter | Type | Description |
|---|---|---|
pitcher_id |
int |
MLBAM pitcher ID |
batter_id |
int |
MLBAM batter ID |
prev_pitches |
list[Pitch] | None |
Required for xLSTM (use [] for cold-start) |
algorithm |
str |
Algorithm to use: "similarity" or "xlstm" |
sample_size |
int |
Number of pitches to sample |
game_date |
str | None |
Game date in "YYYY-MM-DD" format |
Additional optional context fields include count, bases, score, inning, fielders, rest days, and strike zone bounds (see PredictPitcherRequest for the full list).
xLSTM requirements: When algorithm="xlstm", prev_pitches is required and every pitch in the list must include a pa_id. Weights are loaded lazily on first use; set PITCHPREDICT_XLSTM_PATH to a local checkpoint directory (model.safetensors + config.json) if you want to bypass downloads. xLSTM is only available for pitcher predictions.
A dictionary containing:
| Key | Type | Description |
|---|---|---|
basic_pitch_data |
dict |
Pitch type probabilities, speed/location means |
detailed_pitch_data |
dict |
Fastball vs offspeed breakdown, percentiles |
basic_outcome_data |
dict |
Outcome probabilities, swing rates |
detailed_outcome_data |
dict |
Contact quality metrics |
pitches |
list |
Sampled pitches with detailed attributes |
prediction_metadata |
dict |
Timing and sample information |
The return value is a PredictPitcherResponse model; use attribute access or model_dump() for a dict.
import asyncio
from pitchpredict import PitchPredict
async def main():
client = PitchPredict()
# Resolve player IDs from names
kershaw_id = await client.get_player_id_from_name("Clayton Kershaw")
judge_id = await client.get_player_id_from_name("Aaron Judge")
result = await client.predict_pitcher(
pitcher_id=kershaw_id,
batter_id=judge_id,
count_balls=1,
count_strikes=2,
score_bat=2,
score_fld=1,
game_date="2024-06-15",
algorithm="similarity"
)
# Pitch type probabilities
print(result.basic_pitch_data["pitch_type_probs"])
# {'FF': 0.42, 'SL': 0.31, 'CU': 0.18, 'CH': 0.09}
# Average pitch speed
print(result.basic_pitch_data["pitch_speed_mean"])
# 92.5
# Outcome probabilities
print(result.basic_outcome_data["outcome_probs"])
# {'strike': 0.45, 'ball': 0.35, 'contact': 0.20}
asyncio.run(main())Predict the batter's outcome given a specific pitch.
async def predict_batter(
pitcher_id: int,
batter_id: int,
pitch_type: str,
pitch_speed: float,
pitch_x: float,
pitch_z: float,
prev_pitches: list[Pitch] | None = None,
algorithm: str = "similarity",
sample_size: int = 1,
pitcher_age: int | None = None,
pitcher_throws: Literal["L", "R"] | None = None,
batter_age: int | None = None,
batter_hits: Literal["L", "R"] | None = None,
count_balls: int | None = None,
count_strikes: int | None = None,
outs: int | None = None,
bases_state: int | None = None,
score_bat: int | None = None,
score_fld: int | None = None,
inning: int | None = None,
pitch_number: int | None = None,
number_through_order: int | None = None,
game_date: str | None = None,
fielder_2_id: int | None = None,
fielder_3_id: int | None = None,
fielder_4_id: int | None = None,
fielder_5_id: int | None = None,
fielder_6_id: int | None = None,
fielder_7_id: int | None = None,
fielder_8_id: int | None = None,
fielder_9_id: int | None = None,
batter_days_since_prev_game: int | None = None,
pitcher_days_since_prev_game: int | None = None,
strike_zone_top: float | None = None,
strike_zone_bottom: float | None = None,
) -> PredictBatterResponse| Parameter | Type | Description |
|---|---|---|
pitcher_id |
int |
MLBAM pitcher ID |
batter_id |
int |
MLBAM batter ID |
pitch_type |
str |
Pitch type code (e.g., "FF", "SL") |
pitch_speed |
float |
Pitch speed in mph |
pitch_x |
float |
Horizontal location at plate (feet from center) |
pitch_z |
float |
Vertical location at plate (feet from ground) |
prev_pitches |
list[Pitch] | None |
Reserved for xLSTM (not used for batter predictions yet) |
algorithm |
str |
Algorithm to use: "similarity" |
sample_size |
int |
Number of pitches to sample |
game_date |
str | None |
Game date in "YYYY-MM-DD" format |
Additional optional context fields match predict_pitcher (count, bases, score, inning, fielders, rest days, strike zone bounds).
A dictionary containing:
| Key | Type | Description |
|---|---|---|
basic_outcome_data |
dict |
Outcome probabilities, swing rates |
detailed_outcome_data |
dict |
Contact quality, expected stats |
prediction_metadata |
dict |
Timing and sample information |
import asyncio
from pitchpredict import PitchPredict
async def main():
client = PitchPredict()
result = await client.predict_batter(
pitcher_id=477132,
batter_id=592450,
game_date="2024-06-15",
pitch_type="FF",
pitch_speed=95.0,
pitch_x=0.5,
pitch_z=2.5,
count_balls=1,
count_strikes=2,
score_bat=2,
score_fld=1,
algorithm="similarity"
)
# Swing probability
print(result.basic_outcome_data["swing_probability"])
# Contact event probabilities
print(result.basic_outcome_data["contact_event_probs"])
asyncio.run(main())Predict batted ball outcome probabilities given exit velocity, launch angle, and optional game context.
async def predict_batted_ball(
launch_speed: float,
launch_angle: float,
algorithm: str,
spray_angle: float | None = None,
bb_type: str | None = None,
outs: int | None = None,
bases_state: int | None = None,
batter_id: int | None = None,
game_date: str | None = None,
) -> dict[str, Any]| Parameter | Type | Required | Description |
|---|---|---|---|
launch_speed |
float |
Yes | Exit velocity in mph |
launch_angle |
float |
Yes | Launch angle in degrees (-90 to 90) |
algorithm |
str |
Yes | Algorithm to use: "similarity" |
spray_angle |
float |
No | Horizontal direction (-45 to 45, 0 = center field) |
bb_type |
str |
No | Batted ball type: "ground_ball", "line_drive", "fly_ball", "popup" |
outs |
int |
No | Current outs (0-2) |
bases_state |
int |
No | Bases occupied bitmask (1=1B, 2=2B, 4=3B) |
batter_id |
int |
No | MLBAM batter ID |
game_date |
str |
No | Game date in "YYYY-MM-DD" format |
A dictionary containing:
| Key | Type | Description |
|---|---|---|
basic_outcome_data |
dict |
Outcome probabilities, hit probability, xBA, inferred batted ball type |
detailed_outcome_data |
dict |
Sample statistics, expected stats (xBA, xSLG, xwOBA) |
prediction_metadata |
dict |
Sample size and similarity weights |
import asyncio
from pitchpredict import PitchPredict
async def main():
client = PitchPredict()
# Basic prediction with just exit velocity and launch angle
result = await client.predict_batted_ball(
launch_speed=95.0,
launch_angle=18.0,
algorithm="similarity"
)
# Outcome probabilities
print(result["basic_outcome_data"]["outcome_probs"])
# {'single': 0.18, 'double': 0.08, 'triple': 0.01, 'home_run': 0.05, ...}
# Hit probability
print(result["basic_outcome_data"]["hit_probability"])
# 0.32
# Expected batting average
print(result["basic_outcome_data"]["xba"])
# 0.285
# With game context for context-aware filtering
result = await client.predict_batted_ball(
launch_speed=102.0,
launch_angle=25.0,
algorithm="similarity",
outs=1,
bases_state=5 # runners on 1B and 3B
)
# sac_fly will be included (outs < 2 and runner on 3B)
# double_play will be included (runners on base)
print(result["basic_outcome_data"]["outcome_probs"])
asyncio.run(main()){
"pitch_type_probs": {
"FF": 0.45, # Four-seam fastball
"SL": 0.30, # Slider
"CU": 0.15, # Curveball
"CH": 0.10 # Changeup
},
"pitch_speed_mean": 92.5, # Average speed (mph)
"pitch_speed_std": 4.2, # Speed standard deviation
"pitch_x_mean": 0.12, # Average horizontal location (feet)
"pitch_x_std": 0.85, # Horizontal location std dev
"pitch_z_mean": 2.45, # Average vertical location (feet)
"pitch_z_std": 0.72 # Vertical location std dev
}{
"pitch_prob_fastball": 0.55, # Probability of fastball
"pitch_prob_offspeed": 0.45, # Probability of offspeed
"pitch_data_fastballs": {
"pitch_type_probs": {"FF": 0.70, "SI": 0.20, "FC": 0.10},
"pitch_speed_mean": 95.2,
"pitch_speed_p05": 92.0, # 5th percentile
"pitch_speed_p25": 94.0, # 25th percentile
"pitch_speed_p50": 95.5, # Median
"pitch_speed_p75": 96.5, # 75th percentile
"pitch_speed_p95": 98.0, # 95th percentile
# ... location percentiles
},
"pitch_data_offspeed": {
# Similar structure for offspeed pitches
},
"pitch_data_overall": {
# Combined statistics
}
}{
"outcome_probs": {
"strike": 0.45,
"ball": 0.35,
"contact": 0.20
},
"swing_probability": 0.55,
"swing_event_probs": {
"swinging_strike": 0.30,
"contact": 0.70
},
"contact_probability": 0.20,
"contact_event_probs": {
"single": 0.18,
"double": 0.05,
"home_run": 0.03,
"field_out": 0.60,
# ...
}
}{
"swing_data": {
"bat_speed_mean": 72.5, # mph
"bat_speed_std": 3.2,
"swing_length_mean": 7.2, # feet
"swing_length_std": 0.8,
# ... percentiles
},
"contact_data": {
"bb_type_probs": { # Batted ball types
"fly_ball": 0.35,
"ground_ball": 0.40,
"line_drive": 0.25
},
"BA": 0.285, # Batting average on contact
"SLG": 0.520, # Slugging on contact
"wOBA": 0.380, # wOBA on contact
"exit_velocity_mean": 92.0,
"launch_angle_mean": 12.5,
"xBA": 0.290, # Expected BA
"xSLG": 0.510, # Expected SLG
"xwOBA": 0.375 # Expected wOBA
}
}{
"start_time": "2024-06-15T10:30:00",
"end_time": "2024-06-15T10:30:02",
"duration": 2.5, # seconds
"n_pitches_total": 5000, # Total pitches analyzed
"n_pitches_sampled": 250, # Pitches in sample
"sample_pctg": 0.05 # Sample percentage
}{
"outcome_probs": {
"single": 0.18,
"double": 0.08,
"triple": 0.01,
"home_run": 0.05,
"groundout": 0.25,
"flyout": 0.20,
"lineout": 0.08,
"popout": 0.02,
"sac_fly": 0.03, # Only if outs < 2 AND runner on 3B
"double_play": 0.04, # Only if runners on base
"field_error": 0.02,
"force_out": 0.04 # Only if force play possible
},
"hit_probability": 0.32, # Probability of a hit
"xba": 0.285, # Expected batting average
"bb_type_inferred": "line_drive" # Inferred from launch angle
}{
"sample_launch_speed_mean": 98.5, # Mean EV of similar batted balls
"sample_launch_angle_mean": 15.2, # Mean LA of similar batted balls
"expected_stats": {
"xBA": 0.285, # Expected batting average
"xSLG": 0.520, # Expected slugging
"xwOBA": 0.380 # Expected wOBA
}
}{
"n_batted_balls_sampled": 750, # Number of similar batted balls
"sample_pctg": 0.05, # Sample percentage
"similarity_weights": {
"launch_speed": 0.45, # Exit velocity weight
"launch_angle": 0.40, # Launch angle weight
"spray_angle": 0.05, # Spray angle weight
"bases_state": 0.05, # Bases state weight
"outs": 0.03, # Outs weight
"date": 0.02 # Date recency weight
}
}PitchPredict raises HTTPException for errors:
from fastapi import HTTPException
try:
result = await client.predict_pitcher(...)
except HTTPException as e:
if e.status_code == 400:
print(f"Bad request: {e.detail}")
elif e.status_code == 500:
print(f"Server error: {e.detail}")Common errors:
| Status | Description |
|---|---|
| 400 | Invalid algorithm name |
| 500 | Player not found, data fetch error |