Last data update: 2014.03.03

R: Rearranges and improves the legibility of the output from the...
cp.calcR Documentation

Rearranges and improves the legibility of the output from the stepwise function in S-Plus.

Description

Rearranges and improves the legibility of the output from the stepwise function in S-Plus. The output can be used for the Cp plot. cp.calc works only in S-Plus. Use regsubsets in R. The example below works in both languages.

Usage

cp.calc(sw, data, y.name)

## S3 method for class 'cp.object'
print(x, ...)

## S3 method for class 'cp.object'
x[..., drop = TRUE]

Arguments

sw

Output from the S-Plus stepwise function.

data

Dataset name from which "sw" was calculated.

y.name

Name of response variable for which "sw" was calculated.

x

Object of class "cp.object".

...

Additional arguments to "[" or "print".

drop

Argument to the print function.

Value

"cp.object", which is a data.frame containing information about each model that was attempted with additional attributes: tss total sum of squares, n number of observations, y.name response variable, full.i row name of full model. The columns are

p

number of parameters in the model

cp

Cp statistic

aic

AIC statistic

rss

Residual sum of squares

r2

R^2

r2.adj

Adjusted R^2

xvars

X variables

sw.names

Model name produced by stepwise.

Author(s)

Richard M. Heiberger <rmh@temple.edu>

References

Heiberger, Richard M. and Holland, Burt (2004). Statistical Analysis and Data Display: An Intermediate Course with Examples in S-Plus, R, and SAS. Springer Texts in Statistics. Springer. ISBN 0-387-40270-5.

Examples

## This example is from Section 9.15 of Heiberger and Holland (2004).
data(usair)
if.R(s={usair <- usair}, r={})

splom(~usair, main="U.S. Air Pollution Data with SO2 response", cex=.5)
## export.eps(hh("regb/figure/regb.f1.usair.eps"))

usair$lnSO2 <- log(usair$SO2)
usair$lnmfg <- log(usair$mfgfirms)
usair$lnpopn <- log(usair$popn)

usair[1:3,]   ## lnSO2 is in position 8, SO2 is in position 1
              ## lnmfg is in position 9, lnpopn is in position 10

splom(~usair[, c(8,2,9,10,5:7)],
              main="U.S. Air Pollution Data with 3 log-transformed variables",
              cex=.5)
## export.eps(hh("regb/figure/regb.f2.usair.eps"))

if.R(s={
  usair.step <- stepwise(y=usair$lnSO2,
                         x=usair[, c(2,9,10,5:7)],
                         method="exhaustive",
                         plot=FALSE, nbest=2)
  ## print for pedagogical purposes only.  The plot of cp ~ p is more useful.
  ## The line with rss=1e35 is a stepwise() bug, that we reported to S-Plus.
  print(usair.step, digits=4)
  usair.cp <- cp.calc(usair.step, usair, "lnSO2")
  ## print for pedagogical purposes only.  The plot of cp ~ p is more useful.
  usair.cp
  tmp <- (usair.cp$cp <= 10)
  usair.cp[tmp,]

  old.par <- par(mar=par()$mar+c(0,1,0,0))
  tmp <- (usair.cp$cp <= 10)
  plot(cp ~ p, data=usair.cp[tmp,], ylim=c(0,10), type="n", cex=1.3)
  abline(b=1)
  text(x=usair.cp$p[tmp], y=usair.cp$cp[tmp],
       row.names(usair.cp)[tmp], cex=1.3)
  title(main="Cp plot for usair.dat, Cp<10")
  par(old.par)
## export.eps(hh("regb/figure/regb.f3.usair.eps"))
},r={
  usair.regsubset <- leaps::regsubsets(lnSO2~lnmfg+lnpopn+precip+raindays+temp+wind,
                                       data=usair, nbest=2)
  usair.subsets.Summary <- summaryHH(usair.regsubset)
  tmp <- (usair.subsets.Summary$cp <= 10)
  usair.subsets.Summary[tmp,]
  plot(usair.subsets.Summary[tmp,], statistic='cp', legend=FALSE)

  usair.lm7 <- lm.regsubsets(usair.regsubset, 7)
  anova(usair.lm7)
  summary(usair.lm7)
})

vif(lnSO2 ~ temp + lnmfg + lnpopn + wind + precip + raindays, data=usair)

vif(lnSO2 ~ temp + lnmfg + wind + precip, data=usair)

usair.lm <- lm(lnSO2 ~ temp + lnmfg + wind + precip, data=usair)
anova(usair.lm)
summary(usair.lm, corr=FALSE)

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(HH)
Loading required package: lattice
Loading required package: grid
Loading required package: latticeExtra
Loading required package: RColorBrewer
Loading required package: multcomp
Loading required package: mvtnorm
Loading required package: survival
Loading required package: TH.data
Loading required package: MASS

Attaching package: 'TH.data'

The following object is masked from 'package:MASS':

    geyser

Loading required package: gridExtra
> png(filename="/home/ddbj/snapshot/RGM3/R_CC/result/HH/cp.Rd_%03d_medium.png", width=480, height=480)
> ### Name: cp.calc
> ### Title: Rearranges and improves the legibility of the output from the
> ###   stepwise function in S-Plus.
> ### Aliases: cp.calc print.cp.object [.cp.object
> ### Keywords: regression
> 
> ### ** Examples
> 
> ## This example is from Section 9.15 of Heiberger and Holland (2004).
> data(usair)
> if.R(s={usair <- usair}, r={})
NULL
> 
> splom(~usair, main="U.S. Air Pollution Data with SO2 response", cex=.5)
> ## export.eps(hh("regb/figure/regb.f1.usair.eps"))
> 
> usair$lnSO2 <- log(usair$SO2)
> usair$lnmfg <- log(usair$mfgfirms)
> usair$lnpopn <- log(usair$popn)
> 
> usair[1:3,]   ## lnSO2 is in position 8, SO2 is in position 1
  SO2 temp mfgfirms popn wind precip raindays    lnSO2    lnmfg   lnpopn
1  10 70.3      213  582  6.0   7.05       36 2.302585 5.361292 6.366470
2  13 61.0       91  132  8.2  48.52      100 2.564949 4.510860 4.882802
3  12 56.7      453  716  8.7  20.66       67 2.484907 6.115892 6.573680
>               ## lnmfg is in position 9, lnpopn is in position 10
> 
> splom(~usair[, c(8,2,9,10,5:7)],
+               main="U.S. Air Pollution Data with 3 log-transformed variables",
+               cex=.5)
> ## export.eps(hh("regb/figure/regb.f2.usair.eps"))
> 
> if.R(s={
+   usair.step <- stepwise(y=usair$lnSO2,
+                          x=usair[, c(2,9,10,5:7)],
+                          method="exhaustive",
+                          plot=FALSE, nbest=2)
+   ## print for pedagogical purposes only.  The plot of cp ~ p is more useful.
+   ## The line with rss=1e35 is a stepwise() bug, that we reported to S-Plus.
+   print(usair.step, digits=4)
+   usair.cp <- cp.calc(usair.step, usair, "lnSO2")
+   ## print for pedagogical purposes only.  The plot of cp ~ p is more useful.
+   usair.cp
+   tmp <- (usair.cp$cp <= 10)
+   usair.cp[tmp,]
+ 
+   old.par <- par(mar=par()$mar+c(0,1,0,0))
+   tmp <- (usair.cp$cp <= 10)
+   plot(cp ~ p, data=usair.cp[tmp,], ylim=c(0,10), type="n", cex=1.3)
+   abline(b=1)
+   text(x=usair.cp$p[tmp], y=usair.cp$cp[tmp],
+        row.names(usair.cp)[tmp], cex=1.3)
+   title(main="Cp plot for usair.dat, Cp<10")
+   par(old.par)
+ ## export.eps(hh("regb/figure/regb.f3.usair.eps"))
+ },r={
+   usair.regsubset <- leaps::regsubsets(lnSO2~lnmfg+lnpopn+precip+raindays+temp+wind,
+                                        data=usair, nbest=2)
+   usair.subsets.Summary <- summaryHH(usair.regsubset)
+   tmp <- (usair.subsets.Summary$cp <= 10)
+   usair.subsets.Summary[tmp,]
+   plot(usair.subsets.Summary[tmp,], statistic='cp', legend=FALSE)
+ 
+   usair.lm7 <- lm.regsubsets(usair.regsubset, 7)
+   anova(usair.lm7)
+   summary(usair.lm7)
+ })

Call:
lm(formula = lnSO2 ~ lnmfg + precip + temp + wind, data = usair)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.8965 -0.3405 -0.0854  0.2963  1.0321 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  6.891385   1.070089   6.440  1.8e-07 ***
lnmfg        0.239991   0.086767   2.766  0.00890 ** 
precip       0.019299   0.007378   2.616  0.01293 *  
temp        -0.073038   0.012834  -5.691  1.8e-06 ***
wind        -0.184368   0.062025  -2.972  0.00524 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5006 on 36 degrees of freedom
Multiple R-squared:  0.5427,	Adjusted R-squared:  0.4919 
F-statistic: 10.68 on 4 and 36 DF,  p-value: 8.233e-06

> 
> vif(lnSO2 ~ temp + lnmfg + lnpopn + wind + precip + raindays, data=usair)
    temp    lnmfg   lnpopn     wind   precip raindays 
4.152687 5.322922 5.411364 1.349454 3.741815 3.540797 
> 
> vif(lnSO2 ~ temp + lnmfg + wind + precip, data=usair)
    temp    lnmfg     wind   precip 
1.373342 1.115383 1.253309 1.204027 
> 
> usair.lm <- lm(lnSO2 ~ temp + lnmfg + wind + precip, data=usair)
> anova(usair.lm)
Analysis of Variance Table

Response: lnSO2
          Df Sum Sq Mean Sq F value    Pr(>F)    
temp       1 5.8914  5.8914 23.5088 2.384e-05 ***
lnmfg      1 1.3007  1.3007  5.1904   0.02875 *  
wind       1 1.8005  1.8005  7.1845   0.01102 *  
precip     1 1.7145  1.7145  6.8415   0.01293 *  
Residuals 36 9.0218  0.2506                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> summary(usair.lm, corr=FALSE)

Call:
lm(formula = lnSO2 ~ temp + lnmfg + wind + precip, data = usair)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.8965 -0.3405 -0.0854  0.2963  1.0321 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  6.891385   1.070089   6.440  1.8e-07 ***
temp        -0.073038   0.012834  -5.691  1.8e-06 ***
lnmfg        0.239991   0.086767   2.766  0.00890 ** 
wind        -0.184368   0.062025  -2.972  0.00524 ** 
precip       0.019299   0.007378   2.616  0.01293 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5006 on 36 degrees of freedom
Multiple R-squared:  0.5427,	Adjusted R-squared:  0.4919 
F-statistic: 10.68 on 4 and 36 DF,  p-value: 8.233e-06

> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>