Last data update: 2014.03.03

R: Negative binomial (NB2): generic synthetic negative binomial...
nb2_synR Documentation

Negative binomial (NB2): generic synthetic negative binomial data and model

Description

nb2_syn is a generic function for developing synthetic NB2 data and a model given user defined specifications.

Usage

nb2_syn(nobs = 50000, off = 0, alpha = 1, xv = c(1, 0.75, -1.5))

Arguments

nobs

number of observations in model, Default is 50000

alpha

NB2 heterogeneity or ancillary parameter

off

optional: log of offset variable

xv

predictor coefficient values. First argument is intercept. Use as xv = c(intercept , x1_coef, x2_coef, ...)

Details

Create a synthetic negative binomial (NB2) regression model using the appropriate arguments. Model data with predictors indicated as a group with a period (.). Offset optional. If no offset is desired, drop "off= loff" from nb2_syn function call and "+ loff" from glm.nb function call. See examples.

Data can be estimated using the glm.nb() function, or the ml.nb2() function in the COUNT package, or by using the gamlss function in the gamlss package, with "family=NBI" option.

Value

nby

Negative binomial response; number of counts

sim.data

synthetic data set

Author(s)

Andrew Robinson, Universty of Melbourne, Australia, and Joseph M. Hilbe, Arizona State University, Jet Propulsion Laboratory, California Institute of Technology

References

Hilbe, J.M. (2011), Negative Binomial Regression, second edition, Cambridge University Press.

See Also

poisson_syn, nb1_syn, nbc_syn

Examples

library(MASS)           

sim.data <- nb2_syn(nobs = 500, alpha = .5, xv = c(2, .75, -1.25))
mynb2 <- glm.nb(nby ~ . , data = sim.data)
summary(mynb2)
confint(mynb2)

# with offset
oset <- rep(1:5, each=100, times=1)*100 
loff <- log(oset)   
sim.data <- nb2_syn(nobs = 500, off = loff, alpha = .5, xv = c(1.2, -.75, .25, -1.3))
mypof <- glm.nb(nby ~ . + loff, data = sim.data)
summary(mypof)
confint(mypof)

# without offset, exponentiated coefficients, CI's
sim.data <- nb2_syn(nobs = 500, alpha = .75, xv = c(1, .5, -1.4))
mynbf <- glm.nb(nby ~ . , data = sim.data)
exp(coef(mynbf))
exp(confint(mynbf))

## Not run: 
# default, without offset
sim.data <- nb2_syn()
dnb2 <- glm.nb(nby ~ . , data = sim.data)
summary(dnb2)

## End(Not run)

# use ml.nb2.r function
sim.data <- nb2_syn(nobs = 500, alpha = .5, xv = c(2, .75, -1.25))
mynb2x <- ml.nb2(nby ~ . , data = sim.data)
mynb2x

## Not run: 
# use gamlss function for modeling data after sim.data created
library(gamlss)
sim.data <- nb2_syn(nobs = 500, alpha = .5, xv = c(2, .75, -1.25))
gamnb <- gamlss(nby ~ ., family=NBI, data = sim.data)
gamnb

## End(Not run)

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(COUNT)
Loading required package: msme
Loading required package: MASS
Loading required package: lattice
Loading required package: sandwich
> png(filename="/home/ddbj/snapshot/RGM3/R_CC/result/COUNT/nb2_syn.Rd_%03d_medium.png", width=480, height=480)
> ### Name: nb2_syn
> ### Title: Negative binomial (NB2): generic synthetic negative binomial
> ###   data and model
> ### Aliases: nb2_syn
> ### Keywords: models negative binomial
> 
> ### ** Examples
> 
> library(MASS)           
> 
> sim.data <- nb2_syn(nobs = 500, alpha = .5, xv = c(2, .75, -1.25))
> mynb2 <- glm.nb(nby ~ . , data = sim.data)
> summary(mynb2)

Call:
glm.nb(formula = nby ~ ., data = sim.data, init.theta = 0.5066382356, 
    link = log)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.7167  -1.1999  -0.4804   0.1921   2.4216  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  2.02280    0.06735   30.04   <2e-16 ***
x1           0.78370    0.06499   12.06   <2e-16 ***
x2          -1.24270    0.06823  -18.21   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(0.5066) family taken to be 1)

    Null deviance: 1000.4  on 499  degrees of freedom
Residual deviance:  543.8  on 497  degrees of freedom
AIC: 3068.4

Number of Fisher Scoring iterations: 1


              Theta:  0.5066 
          Std. Err.:  0.0363 

 2 x log-likelihood:  -3060.3510 
> confint(mynb2)
Waiting for profiling to be done...
                2.5 %     97.5 %
(Intercept)  1.892977  2.1569546
x1           0.655591  0.9132536
x2          -1.377577 -1.1111705
> 
> # with offset
> oset <- rep(1:5, each=100, times=1)*100 
> loff <- log(oset)   
> sim.data <- nb2_syn(nobs = 500, off = loff, alpha = .5, xv = c(1.2, -.75, .25, -1.3))
> mypof <- glm.nb(nby ~ . + loff, data = sim.data)
> summary(mypof)

Call:
glm.nb(formula = nby ~ . + loff, data = sim.data, init.theta = 0.5171748671, 
    link = log)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-3.0201  -1.1514  -0.5155   0.0829   3.8578  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  1.21355    0.61322   1.979   0.0478 *  
x1          -0.75916    0.06284 -12.080  < 2e-16 ***
x2           0.36598    0.06291   5.817 5.99e-09 ***
x3          -1.42552    0.06259 -22.775  < 2e-16 ***
loff         1.02801    0.10967   9.374  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(0.5172) family taken to be 1)

    Null deviance: 1406.74  on 499  degrees of freedom
Residual deviance:  624.85  on 495  degrees of freedom
AIC: 7779.2

Number of Fisher Scoring iterations: 1


              Theta:  0.5172 
          Std. Err.:  0.0278 

 2 x log-likelihood:  -7767.1770 
> confint(mypof)
Waiting for profiling to be done...
                  2.5 %     97.5 %
(Intercept)  0.02398871  2.4458868
x1          -0.87512825 -0.6421124
x2           0.23978825  0.4919452
x3          -1.54746107 -1.3034235
loff         0.80892575  1.2420037
> 
> # without offset, exponentiated coefficients, CI's
> sim.data <- nb2_syn(nobs = 500, alpha = .75, xv = c(1, .5, -1.4))
> mynbf <- glm.nb(nby ~ . , data = sim.data)
> exp(coef(mynbf))
(Intercept)          x1          x2 
   2.550626    1.490048    0.250432 
> exp(confint(mynbf))
Waiting for profiling to be done...
                2.5 %    97.5 %
(Intercept) 2.2475720 2.8978074
x1          1.3171397 1.6885148
x2          0.2193185 0.2844587
> 
> ## Not run: 
> ##D # default, without offset
> ##D sim.data <- nb2_syn()
> ##D dnb2 <- glm.nb(nby ~ . , data = sim.data)
> ##D summary(dnb2)
> ## End(Not run)
> 
> # use ml.nb2.r function
> sim.data <- nb2_syn(nobs = 500, alpha = .5, xv = c(2, .75, -1.25))
> mynb2x <- ml.nb2(nby ~ . , data = sim.data)
> mynb2x
              Estimate         SE         Z        LCL        UCL
(Intercept)  2.0117127 0.06551303  30.70706  1.8833072  2.1401183
x1           0.7856796 0.06729850  11.67455  0.6537745  0.9175846
x2          -1.3916874 0.06869013 -20.26037 -1.5263201 -1.2570548
alpha        1.7901299 0.13104064  13.66088  1.5332903  2.0469696
> 
> ## Not run: 
> ##D # use gamlss function for modeling data after sim.data created
> ##D library(gamlss)
> ##D sim.data <- nb2_syn(nobs = 500, alpha = .5, xv = c(2, .75, -1.25))
> ##D gamnb <- gamlss(nby ~ ., family=NBI, data = sim.data)
> ##D gamnb
> ## End(Not run)
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>