Last data update: 2014.03.03
|
R: Pitching table
Pitching table
Description
Pitching table
Usage
data(Pitching)
Format
A data frame with 43330 observations on the following 30 variables.
playerID Player ID code
yearID Year
stint player's stint (order of appearances within a season)
teamID Team; a factor
lgID League; a factor with levels AA AL FL NL PL UA
W Wins
L Losses
G Games
GS Games Started
CG Complete Games
SHO Shutouts
SV Saves
IPouts Outs Pitched (innings pitched x 3)
H Hits
ER Earned Runs
HR Homeruns
BB Walks
SO Strikeouts
BAOpp Opponent's Batting Average
ERA Earned Run Average
IBB Intentional Walks
WP Wild Pitches
HBP Batters Hit By Pitch
BK Balks
BFP Batters faced by Pitcher
GF Games Finished
R Runs Allowed
SH Sacrifices by opposing batters
SF Sacrifice flies by opposing batters
GIDP Grounded into double plays by opposing batter
Source
Lahman, S. (2015) Lahman's Baseball Database, 1871-2014, 2015 version, http://baseball1.com/statistics/
Examples
# Pitching data
require(plyr)
###################################
# cleanup, and add some other stats
###################################
# Restrict to AL and NL data, 1901+
# All data re SH, SF and GIDP are missing, so remove
# Intentional walks (IBB) not recorded until 1955
pitching <- subset(Pitching, yearID >= 1901 & lgID %in% c("AL", "NL"))[, -(28:30)]
# Approximate missing BAOpp values (most common remaining missing value)
pitching$BAOpp <- with(pitching, round(H/(BFP - BB - HBP), 3))
# Compute WHIP (hits + walks per inning pitched -- lower is better)
pitching <- mutate(pitching,
WHIP = round((H + BB) * 3/IPouts, 2),
KperBB = round(ifelse(yearID >= 1955,
SO/(BB - IBB), SO/BB), 2))
#####################
# some simple queries
#####################
# Team pitching statistics, Toronto Blue Jays, 1993
tor93 <- subset(pitching, yearID == 1993 & teamID == "TOR")
arrange(tor93, ERA)
# Career pitching statistics, Greg Maddux
subset(pitching, playerID == "maddugr01")
# Best ERAs for starting pitchers post WWII
postwar <- subset(pitching, yearID >= 1946 & IPouts >= 600)
head(arrange(postwar, ERA), 10)
# Best K/BB ratios post-1955 among starters (excludes intentional walks)
post55 <- subset(pitching, yearID >= 1955 & IPouts >= 600)
post55 <- mutate(post55, KperBB = SO/(BB - IBB))
head(arrange(post55, desc(KperBB)), 10)
# Best K/BB ratios among relievers post-1950 (min. 20 saves)
head(arrange(subset(pitching, yearID >= 1950 & SV >= 20), desc(KperBB)), 10)
###############################################
# Winningest pitchers in each league each year:
###############################################
# Add name & throws information:
masterInfo <- Master[, c('playerID',
'nameLast', 'nameFirst', 'throws')]
pitching <- merge(pitching, masterInfo, all.x=TRUE)
wp <- ddply(pitching, .(yearID, lgID), subset, W == max(W),
select = c("playerID", "teamID", "W", "throws"))
anova(lm(formula = W ~ yearID + I(yearID^2) + lgID + throws, data = wp))
# an eye-catching, but naive, specious graph
require('ggplot2')
# compare loess smooth with quadratic fit
ggplot(wp, aes(x = yearID, y = W)) +
geom_point(aes(colour = throws, shape=lgID), size = 2) +
geom_smooth(method="loess", size=1.5, color="blue") +
geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ poly(x,2)) +
ylab("Maximum Wins") + xlab("Year") +
ggtitle("Why can't pitchers win 30+ games any more?")
Results
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(Lahman)
> png(filename="/home/ddbj/snapshot/RGM3/R_CC/result/Lahman/Pitching.Rd_%03d_medium.png", width=480, height=480)
> ### Name: Pitching
> ### Title: Pitching table
> ### Aliases: Pitching
> ### Keywords: datasets
>
> ### ** Examples
>
> # Pitching data
>
> require(plyr)
Loading required package: plyr
>
> ###################################
> # cleanup, and add some other stats
> ###################################
>
> # Restrict to AL and NL data, 1901+
> # All data re SH, SF and GIDP are missing, so remove
> # Intentional walks (IBB) not recorded until 1955
> pitching <- subset(Pitching, yearID >= 1901 & lgID %in% c("AL", "NL"))[, -(28:30)]
>
> # Approximate missing BAOpp values (most common remaining missing value)
> pitching$BAOpp <- with(pitching, round(H/(BFP - BB - HBP), 3))
> # Compute WHIP (hits + walks per inning pitched -- lower is better)
> pitching <- mutate(pitching,
+ WHIP = round((H + BB) * 3/IPouts, 2),
+ KperBB = round(ifelse(yearID >= 1955,
+ SO/(BB - IBB), SO/BB), 2))
>
> #####################
> # some simple queries
> #####################
>
> # Team pitching statistics, Toronto Blue Jays, 1993
> tor93 <- subset(pitching, yearID == 1993 & teamID == "TOR")
> arrange(tor93, ERA)
playerID yearID stint teamID lgID W L G GS CG SHO SV IPouts H ER HR
1 dayleke01 1993 1 TOR AL 0 0 2 0 0 0 0 2 1 0 0
2 warddu01 1993 1 TOR AL 2 3 71 0 0 0 45 215 49 17 4
3 eichhma01 1993 1 TOR AL 3 1 54 0 0 0 0 218 76 22 3
4 coxda01 1993 1 TOR AL 7 6 44 0 0 0 2 251 73 29 8
5 castito02 1993 1 TOR AL 3 2 51 0 0 0 0 152 44 19 4
6 hentgpa01 1993 1 TOR AL 19 9 34 32 3 0 0 649 215 93 27
7 guzmaju01 1993 1 TOR AL 14 3 33 33 2 1 0 663 211 98 17
8 flenehu01 1993 1 TOR AL 0 0 6 0 0 0 0 20 7 3 0
9 leiteal01 1993 1 TOR AL 9 6 34 12 1 1 2 315 93 48 8
10 williwo02 1993 1 TOR AL 3 1 30 0 0 0 0 111 40 18 2
11 stewada01 1993 1 TOR AL 12 8 26 26 0 0 0 486 146 80 23
12 timlimi01 1993 1 TOR AL 4 2 54 0 0 0 1 167 63 29 7
13 stottto01 1993 1 TOR AL 11 12 30 28 1 1 0 530 204 95 11
14 browsc01 1993 1 TOR AL 1 1 6 3 0 0 0 54 19 12 2
15 morrija02 1993 1 TOR AL 7 12 27 27 4 1 0 458 189 105 18
16 lintodo01 1993 1 TOR AL 0 1 4 1 0 0 0 33 11 8 0
BB SO BAOpp ERA IBB WP HBP BK BFP GF R WHIP KperBB
1 4 2 0.333 0.00 0 0 0 0 7 0 2 7.50 0.50
2 25 97 0.191 2.13 2 7 1 0 282 70 17 1.03 4.22
3 22 47 0.268 2.72 7 2 3 0 309 16 26 1.35 3.13
4 29 84 0.229 3.12 5 5 0 0 348 13 31 1.22 3.50
5 22 28 0.233 3.38 5 1 0 0 211 10 19 1.30 1.65
6 74 122 0.254 3.87 0 11 7 1 926 0 103 1.34 1.65
7 110 194 0.248 3.99 2 26 3 1 963 0 107 1.45 1.80
8 4 2 0.269 4.05 1 1 0 0 30 1 3 1.65 0.67
9 56 66 0.236 4.11 2 2 4 2 454 4 52 1.42 1.22
10 22 24 0.268 4.38 3 2 1 1 172 9 18 1.68 1.26
11 72 96 0.239 4.44 0 4 4 1 687 0 86 1.35 1.33
12 27 49 0.279 4.69 3 1 1 0 254 27 32 1.62 2.04
13 69 98 0.286 4.84 5 7 3 1 786 0 107 1.55 1.53
14 10 7 0.264 6.00 1 0 1 0 83 1 15 1.61 0.78
15 65 103 0.298 6.19 2 14 3 1 702 0 116 1.66 1.63
16 9 4 0.244 6.55 0 0 1 0 55 0 8 1.82 0.44
>
> # Career pitching statistics, Greg Maddux
> subset(pitching, playerID == "maddugr01")
playerID yearID stint teamID lgID W L G GS CG SHO SV IPouts H ER
25671 maddugr01 1986 1 CHN NL 2 4 6 5 1 0 0 93 44 19
26128 maddugr01 1987 1 CHN NL 6 14 30 27 1 1 0 467 181 97
26586 maddugr01 1988 1 CHN NL 18 8 34 34 9 3 0 747 230 88
27071 maddugr01 1989 1 CHN NL 19 12 35 35 7 1 0 715 222 78
27575 maddugr01 1990 1 CHN NL 15 15 35 35 8 2 0 711 242 91
28099 maddugr01 1991 1 CHN NL 15 11 37 37 7 2 0 789 232 98
28571 maddugr01 1992 1 CHN NL 20 11 35 35 9 4 0 804 201 65
29088 maddugr01 1993 1 ATL NL 20 10 36 36 8 1 0 801 228 70
29604 maddugr01 1994 1 ATL NL 16 6 25 25 10 3 0 606 150 35
30156 maddugr01 1995 1 ATL NL 19 2 28 28 10 3 0 629 147 38
30750 maddugr01 1996 1 ATL NL 15 11 35 35 5 1 0 735 225 74
31357 maddugr01 1997 1 ATL NL 19 4 33 33 5 2 0 698 200 57
31940 maddugr01 1998 1 ATL NL 18 9 34 34 9 5 0 753 201 62
32568 maddugr01 1999 1 ATL NL 19 9 33 33 4 0 0 658 258 87
33219 maddugr01 2000 1 ATL NL 19 9 35 35 6 3 0 748 225 83
33897 maddugr01 2001 1 ATL NL 17 11 34 34 3 3 0 699 220 79
34558 maddugr01 2002 1 ATL NL 16 6 34 34 0 0 0 598 194 58
35231 maddugr01 2003 1 ATL NL 16 11 36 36 1 0 0 655 225 96
35937 maddugr01 2004 1 CHN NL 16 11 33 33 2 1 0 638 218 95
36601 maddugr01 2005 1 CHN NL 13 15 35 35 3 0 0 675 239 106
37264 maddugr01 2006 1 CHN NL 9 11 22 22 0 0 0 409 153 71
37265 maddugr01 2006 2 LAN NL 6 3 12 12 0 0 0 221 66 27
37968 maddugr01 2007 1 SDN NL 14 11 34 34 1 0 0 594 221 91
38683 maddugr01 2008 1 SDN NL 6 9 26 26 0 0 0 460 161 68
38684 maddugr01 2008 2 LAN NL 2 4 7 7 0 0 0 122 43 23
HR BB SO BAOpp ERA IBB WP HBP BK BFP GF R WHIP KperBB
25671 3 11 20 0.333 5.52 2 2 1 0 144 1 20 1.77 2.22
26128 17 74 101 0.291 5.61 13 4 4 7 701 2 111 1.64 1.66
26586 13 81 140 0.240 3.18 16 3 9 6 1047 0 97 1.25 2.15
27071 13 82 135 0.243 2.95 13 5 6 3 1002 0 90 1.28 1.96
27575 11 71 144 0.259 3.46 10 3 4 3 1011 0 116 1.32 2.36
28099 18 66 198 0.232 3.35 9 6 6 3 1070 0 113 1.13 3.47
28571 7 70 199 0.206 2.18 7 5 14 0 1061 0 68 1.01 3.16
29088 14 52 197 0.227 2.36 7 5 6 1 1064 0 85 1.05 4.38
29604 4 31 156 0.204 1.56 3 3 6 1 774 0 44 0.90 5.57
30156 8 23 181 0.194 1.63 3 1 4 0 785 0 39 0.81 9.05
30750 11 28 172 0.238 2.72 11 4 3 0 978 0 85 1.03 10.12
31357 9 20 177 0.231 2.20 6 0 6 0 893 0 58 0.95 12.64
31940 13 45 204 0.215 2.22 10 4 7 0 987 0 75 0.98 5.83
32568 16 37 136 0.287 3.57 8 1 4 0 940 0 103 1.34 4.69
33219 19 42 190 0.234 3.00 12 1 10 2 1012 0 91 1.07 6.33
33897 20 27 173 0.246 3.05 10 2 7 0 927 0 86 1.06 10.18
34558 14 45 118 0.252 2.62 7 1 4 0 820 0 67 1.20 3.11
35231 24 33 124 0.262 3.96 7 3 8 0 901 0 112 1.18 4.77
35937 35 33 151 0.263 4.02 4 2 9 0 872 0 103 1.18 5.21
36601 29 36 136 0.268 4.24 4 8 7 0 936 0 112 1.22 4.25
37264 14 23 81 0.279 4.69 3 0 0 0 572 0 78 1.29 4.05
37265 6 14 36 0.239 3.30 4 0 0 0 290 0 31 1.09 3.60
37968 14 25 104 0.277 4.14 3 5 6 0 830 0 92 1.24 4.73
38683 16 26 80 0.265 3.99 4 2 5 2 638 0 80 1.22 3.64
38684 5 4 18 0.267 5.09 1 0 1 0 166 0 25 1.16 6.00
>
> # Best ERAs for starting pitchers post WWII
> postwar <- subset(pitching, yearID >= 1946 & IPouts >= 600)
> head(arrange(postwar, ERA), 10)
playerID yearID stint teamID lgID W L G GS CG SHO SV IPouts H ER HR BB
1 gibsobo01 1968 1 SLN NL 22 9 34 34 28 13 0 914 198 38 11 62
2 goodedw01 1985 1 NYN NL 24 4 35 35 16 8 0 830 198 47 13 69
3 maddugr01 1994 1 ATL NL 16 6 25 25 10 3 0 606 150 35 4 31
4 tiantlu01 1968 1 CLE AL 21 9 34 32 19 9 0 775 152 46 16 73
5 maddugr01 1995 1 ATL NL 19 2 28 28 10 3 0 629 147 38 8 23
6 chancde01 1964 1 LAA AL 20 9 46 35 15 11 4 835 194 51 7 86
7 koufasa01 1966 1 LAN NL 27 9 41 41 27 5 0 969 241 62 19 77
8 koufasa01 1964 1 LAN NL 19 5 29 28 15 7 1 669 154 43 13 53
9 guidrro01 1978 1 NYA AL 25 3 35 35 16 9 0 821 187 53 13 72
10 martipe02 2000 1 BOS AL 18 6 29 29 7 4 0 651 128 42 17 32
SO BAOpp ERA IBB WP HBP BK BFP GF R WHIP KperBB
1 268 0.181 1.12 6 4 7 0 1161 0 49 0.85 4.79
2 268 0.199 1.53 4 6 2 2 1065 0 51 0.97 4.12
3 156 0.204 1.56 3 3 6 1 774 0 44 0.90 5.57
4 264 0.167 1.60 4 3 4 0 987 0 53 0.87 3.83
5 181 0.194 1.63 3 1 4 0 785 0 39 0.81 9.05
6 207 0.193 1.65 9 9 2 0 1093 7 56 1.01 2.69
7 317 0.201 1.73 4 7 0 0 1274 0 74 0.98 4.34
8 223 0.188 1.74 5 9 0 0 870 1 49 0.93 4.65
9 248 0.190 1.74 1 7 1 1 1057 0 61 0.95 3.49
10 284 0.166 1.74 0 1 14 0 817 0 44 0.74 8.88
>
> # Best K/BB ratios post-1955 among starters (excludes intentional walks)
> post55 <- subset(pitching, yearID >= 1955 & IPouts >= 600)
> post55 <- mutate(post55, KperBB = SO/(BB - IBB))
> head(arrange(post55, desc(KperBB)), 10)
playerID yearID stint teamID lgID W L G GS CG SHO SV IPouts H ER HR
1 maddugr01 1997 1 ATL NL 19 4 33 33 5 2 0 698 200 57 9
2 hugheph01 2014 1 MIN AL 16 10 32 32 1 0 0 629 221 82 16
3 maddugr01 2001 1 ATL NL 17 11 34 34 3 3 0 699 220 79 20
4 maddugr01 1996 1 ATL NL 15 11 35 35 5 1 0 735 225 74 11
5 schilcu01 2002 1 ARI NL 23 7 36 35 5 1 0 778 218 93 29
6 maddugr01 1995 1 ATL NL 19 2 28 28 10 3 0 629 147 38 8
7 martipe02 2000 1 BOS AL 18 6 29 29 7 4 0 651 128 42 17
8 martipe02 1999 1 BOS AL 23 4 31 29 5 1 0 640 160 49 9
9 sheetbe01 2004 1 MIL NL 12 14 34 34 5 0 0 711 201 71 25
10 jenkife01 1971 1 CHN NL 24 13 39 39 30 3 0 975 304 100 29
BB SO BAOpp ERA IBB WP HBP BK BFP GF R WHIP KperBB
1 20 177 0.231 2.20 6 0 6 0 893 0 58 0.95 12.642857
2 16 186 0.265 3.52 1 1 5 0 855 0 88 1.13 12.400000
3 27 173 0.246 3.05 10 2 7 0 927 0 86 1.06 10.176471
4 28 172 0.238 2.72 11 4 3 0 978 0 85 1.03 10.117647
5 33 316 0.222 3.23 1 6 3 0 1017 0 95 0.97 9.875000
6 23 181 0.194 1.63 3 1 4 0 785 0 39 0.81 9.050000
7 32 284 0.166 1.74 0 1 14 0 817 0 44 0.74 8.875000
8 37 313 0.203 2.07 1 6 9 0 835 1 56 0.92 8.694444
9 32 264 0.223 2.70 1 8 4 1 937 0 85 0.98 8.516129
10 37 263 0.242 2.77 6 3 5 4 1299 0 114 1.05 8.483871
>
> # Best K/BB ratios among relievers post-1950 (min. 20 saves)
> head(arrange(subset(pitching, yearID >= 1950 & SV >= 20), desc(KperBB)), 10)
playerID yearID stint teamID lgID W L G GS CG SHO SV IPouts H ER HR BB
1 eckerde01 1990 1 OAK AL 4 2 63 0 0 0 48 220 41 5 2 4
2 eckerde01 1992 1 OAK AL 7 1 69 0 0 0 51 240 62 17 5 11
3 eckerde01 1989 1 OAK AL 4 0 51 0 0 0 33 173 32 10 5 3
4 eckerde01 1991 1 OAK AL 5 4 67 0 0 0 43 228 60 25 11 9
5 ueharko01 2013 1 BOS AL 4 1 73 0 0 0 21 223 33 9 5 9
6 riverma01 2008 1 NYA AL 6 5 64 0 0 0 39 212 41 11 4 6
7 doolise01 2014 1 OAK AL 2 4 61 0 0 0 22 188 38 19 5 8
8 eckerde01 1996 1 SLN NL 0 6 63 0 0 0 30 180 65 22 8 6
9 hoffmtr01 2000 1 SDN NL 4 7 70 0 0 0 43 217 61 24 7 11
10 wagnebi02 2004 1 PHI NL 4 0 45 0 0 0 21 145 31 13 5 6
SO BAOpp ERA IBB WP HBP BK BFP GF R WHIP KperBB
1 73 0.159 0.61 1 0 0 0 262 61 9 0.61 24.33
2 93 0.209 1.91 6 0 1 0 309 65 17 0.91 18.60
3 55 0.158 1.56 0 0 1 0 206 46 10 0.61 18.33
4 87 0.208 2.96 3 1 1 0 299 59 26 0.91 14.50
5 101 0.129 1.09 2 1 1 0 265 40 10 0.57 14.43
6 77 0.163 1.40 0 1 2 0 259 60 11 0.67 12.83
7 89 0.167 2.73 1 0 0 0 236 40 19 0.73 12.71
8 49 0.270 3.30 2 0 4 0 251 53 26 1.18 12.25
9 85 0.218 2.99 4 4 0 0 291 59 29 1.00 12.14
10 59 0.178 2.42 1 1 2 0 182 38 16 0.77 11.80
>
> ###############################################
> # Winningest pitchers in each league each year:
> ###############################################
>
> # Add name & throws information:
> masterInfo <- Master[, c('playerID',
+ 'nameLast', 'nameFirst', 'throws')]
> pitching <- merge(pitching, masterInfo, all.x=TRUE)
>
> wp <- ddply(pitching, .(yearID, lgID), subset, W == max(W),
+ select = c("playerID", "teamID", "W", "throws"))
>
> anova(lm(formula = W ~ yearID + I(yearID^2) + lgID + throws, data = wp))
Analysis of Variance Table
Response: W
Df Sum Sq Mean Sq F value Pr(>F)
yearID 1 1911.58 1911.58 210.0022 < 2.2e-16 ***
I(yearID^2) 1 204.63 204.63 22.4807 3.304e-06 ***
lgID 1 28.68 28.68 3.1511 0.07690 .
throws 1 32.76 32.76 3.5984 0.05881 .
Residuals 296 2694.39 9.10
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
>
> # an eye-catching, but naive, specious graph
>
> require('ggplot2')
Loading required package: ggplot2
> # compare loess smooth with quadratic fit
> ggplot(wp, aes(x = yearID, y = W)) +
+ geom_point(aes(colour = throws, shape=lgID), size = 2) +
+ geom_smooth(method="loess", size=1.5, color="blue") +
+ geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ poly(x,2)) +
+ ylab("Maximum Wins") + xlab("Year") +
+ ggtitle("Why can't pitchers win 30+ games any more?")
>
>
>
>
>
>
>
> dev.off()
null device
1
>
|
|