Last data update: 2014.03.03

R: Salaries table
SalariesR Documentation

Salaries table

Description

Player salary data.

Usage

data(Salaries)

Format

A data frame with 23956 observations on the following 5 variables.

yearID

Year

teamID

Team; a factor

lgID

League; a factor

playerID

Player ID code

salary

Salary

Details

There is no real coverage of player's salaries until 1985.

Source

Lahman, S. (2015) Lahman's Baseball Database, 1871-2014, 2015 version, http://baseball1.com/statistics/

Examples

# what years are included?
summary(Salaries$yearID)
# how many players included each year?
table(Salaries$yearID)

# Team salary data

require(plyr)

# Total team salaries by league, team and year
teamSalaries <- ddply(Salaries, .(lgID, teamID, yearID), summarise,
                       Salary = sum(as.numeric(salary)))

# Arrange in decreasing order within year and league:
teamSalaries <- ddply(teamSalaries, .(yearID, lgID), arrange, desc(Salary))

#######################################
# Highest paid players each year:
maxSal <- ddply(Salaries, .(yearID), subset, salary == max(salary))
names <- apply(t(sapply(maxSal$playerID, playerInfo))[,2:3], 2, paste)
maxSal <- cbind(maxSal, names)
maxSal
plot(salary/100000 ~ yearID, data=maxSal, type='b', ylab='Salary (100,000$)')
# see the whole distribution
boxplot(salary/100000 ~ yearID, data=Salaries, col="lightblue")

# add salary to Batting data
batting <- merge(Batting, 
                 Salaries[,c("playerID", "yearID", "teamID", "salary")], 
                 by=c("playerID", "yearID", "teamID"), all.x=TRUE)
str(batting)

#######################################
# Average salaries by teams, over years
#######################################

require(plyr)
avesal <- ddply(Salaries, .(yearID, teamID, lgID), summarise, 
	salary= mean(salary)/100000)

# remove infrequent teams
tcount <- table(avesal$teamID)
avesal <- subset(avesal, avesal$teamID %in% names(tcount)[tcount>=15], drop=TRUE)
avesal$teamID <- factor(avesal$teamID, levels=names(tcount)[tcount>=15])

require(lattice)
xyplot(salary ~ yearID | teamID, data=avesal, ylab="Salary (100,000$)")

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(Lahman)
> png(filename="/home/ddbj/snapshot/RGM3/R_CC/result/Lahman/Salaries.Rd_%03d_medium.png", width=480, height=480)
> ### Name: Salaries
> ### Title: Salaries table
> ### Aliases: Salaries
> ### Keywords: datasets
> 
> ### ** Examples
> 
> # what years are included?
> summary(Salaries$yearID)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1985    1993    2000    2000    2007    2014 
> # how many players included each year?
> table(Salaries$yearID)

1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 
 550  738  627  663  711  867  685  769  923  884  986  931  925  998 1006  836 
2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 
 860  846  827  831  831  819  842  856  813  830  839  848  815  802 
> 
> # Team salary data
> 
> require(plyr)
Loading required package: plyr
> 
> # Total team salaries by league, team and year
> teamSalaries <- ddply(Salaries, .(lgID, teamID, yearID), summarise,
+                        Salary = sum(as.numeric(salary)))
> 
> # Arrange in decreasing order within year and league:
> teamSalaries <- ddply(teamSalaries, .(yearID, lgID), arrange, desc(Salary))
> 
> #######################################
> # Highest paid players each year:
> maxSal <- ddply(Salaries, .(yearID), subset, salary == max(salary))
> names <- apply(t(sapply(maxSal$playerID, playerInfo))[,2:3], 2, paste)
> maxSal <- cbind(maxSal, names)
> maxSal
   yearID teamID lgID  playerID   salary nameFirst   nameLast
1    1985    PHI   NL schmimi01  2130300      Mike    Schmidt
2    1986    NYN   NL fostege01  2800000    George     Foster
3    1987    PHI   NL schmimi01  2127333      Mike    Schmidt
4    1988    SLN   NL smithoz01  2340000     Ozzie      Smith
5    1989    LAN   NL hershor01  2766667      Orel  Hershiser
6    1990    ML4   AL yountro01  3200000     Robin      Yount
7    1991    LAN   NL strawda01  3800000    Darryl Strawberry
8    1992    NYN   NL bonilbo01  6100000     Bobby    Bonilla
9    1993    NYN   NL bonilbo01  6200000     Bobby    Bonilla
10   1994    NYN   NL bonilbo01  6300000     Bobby    Bonilla
11   1995    DET   AL fieldce01  9237500     Cecil    Fielder
12   1996    DET   AL fieldce01  9237500     Cecil    Fielder
13   1997    CHA   AL belleal01 10000000    Albert      Belle
14   1998    FLO   NL sheffga01 14936667      Gary  Sheffield
15   1999    BAL   AL belleal01 11949794    Albert      Belle
16   2000    LAN   NL brownke01 15714286     Kevin      Brown
17   2001    TEX   AL rodrial01 22000000      Alex  Rodriguez
18   2002    TEX   AL rodrial01 22000000      Alex  Rodriguez
19   2003    TEX   AL rodrial01 22000000      Alex  Rodriguez
20   2004    BOS   AL ramirma02 22500000     Manny    Ramirez
21   2005    NYA   AL rodrial01 26000000      Alex  Rodriguez
22   2006    NYA   AL rodrial01 21680727      Alex  Rodriguez
23   2007    NYA   AL giambja01 23428571     Jason     Giambi
24   2008    NYA   AL rodrial01 28000000      Alex  Rodriguez
25   2009    NYA   AL rodrial01 33000000      Alex  Rodriguez
26   2010    NYA   AL rodrial01 33000000      Alex  Rodriguez
27   2011    NYA   AL rodrial01 32000000      Alex  Rodriguez
28   2012    NYA   AL rodrial01 30000000      Alex  Rodriguez
29   2013    NYA   AL rodrial01 29000000      Alex  Rodriguez
30   2014    LAN   NL greinza01 26000000      Zack    Greinke
> plot(salary/100000 ~ yearID, data=maxSal, type='b', ylab='Salary (100,000$)')
> # see the whole distribution
> boxplot(salary/100000 ~ yearID, data=Salaries, col="lightblue")
> 
> # add salary to Batting data
> batting <- merge(Batting, 
+                  Salaries[,c("playerID", "yearID", "teamID", "salary")], 
+                  by=c("playerID", "yearID", "teamID"), all.x=TRUE)
> str(batting)
'data.frame':	99846 obs. of  23 variables:
 $ playerID: chr  "aardsda01" "aardsda01" "aardsda01" "aardsda01" ...
 $ yearID  : int  2004 2006 2007 2008 2009 2010 2012 2013 1954 1955 ...
 $ teamID  : Factor w/ 149 levels "ALT","ANA","ARI",..: 117 35 33 16 116 116 93 94 80 80 ...
 $ stint   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ lgID    : Factor w/ 7 levels "AA","AL","FL",..: 5 5 2 2 2 2 2 5 5 5 ...
 $ G       : int  11 45 25 47 73 53 1 43 122 153 ...
 $ AB      : int  0 2 0 1 0 0 0 0 468 602 ...
 $ R       : int  0 0 0 0 0 0 0 0 58 105 ...
 $ H       : int  0 0 0 0 0 0 0 0 131 189 ...
 $ X2B     : int  0 0 0 0 0 0 0 0 27 37 ...
 $ X3B     : int  0 0 0 0 0 0 0 0 6 9 ...
 $ HR      : int  0 0 0 0 0 0 0 0 13 27 ...
 $ RBI     : int  0 0 0 0 0 0 0 0 69 106 ...
 $ SB      : int  0 0 0 0 0 0 0 0 2 3 ...
 $ CS      : int  0 0 0 0 0 0 0 0 2 1 ...
 $ BB      : int  0 0 0 0 0 0 0 0 28 49 ...
 $ SO      : int  0 0 0 1 0 0 0 0 39 61 ...
 $ IBB     : int  0 0 0 0 0 0 0 0 NA 5 ...
 $ HBP     : int  0 0 0 0 0 0 0 0 3 3 ...
 $ SH      : int  0 1 0 0 0 0 0 0 6 7 ...
 $ SF      : int  0 0 0 0 0 0 0 0 4 4 ...
 $ GIDP    : int  0 0 0 0 0 0 0 0 13 20 ...
 $ salary  : int  300000 NA 387500 403250 419000 2750000 500000 NA NA NA ...
> 
> #######################################
> # Average salaries by teams, over years
> #######################################
> 
> require(plyr)
> avesal <- ddply(Salaries, .(yearID, teamID, lgID), summarise, 
+ 	salary= mean(salary)/100000)
> 
> # remove infrequent teams
> tcount <- table(avesal$teamID)
> avesal <- subset(avesal, avesal$teamID %in% names(tcount)[tcount>=15], drop=TRUE)
> avesal$teamID <- factor(avesal$teamID, levels=names(tcount)[tcount>=15])
> 
> require(lattice)
Loading required package: lattice
> xyplot(salary ~ yearID | teamID, data=avesal, ylab="Salary (100,000$)")
> 
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>