Last data update: 2014.03.03

R: Parsing gzipped GSOD dataset.
build_gsod_forcing_dataR Documentation

Parsing gzipped GSOD dataset.

Description

Parsing of the gzipped GSOD forcing data as returned from get_gsod_stn()

Usage

build_gsod_forcing_data()

Author(s)

Daniel R. Fuka

Examples

#
# After running get_gsod_stn() with addis as a temp directory then:
#

## Not run: 
tmp_ppt<-build_gsod_forcing_data()
addis

## End(Not run)
## The function is currently defined as
function(){
  tmpdir=readline("Please enter a temp directory where you stored the
  *.op.gz datafiles? \n")
  files=dir(tmpdir,"*.op.gz",full.names=T)
  start_year=min(substr(files,nchar(files)-9,nchar(files)-6))
  end_year=max(substr(files,nchar(files)-9,nchar(files)-6))
  alldates=data.frame(fdate=seq(from=as.Date(paste(start_year,"-01-01",
  sep="")), to=as.Date(paste(end_year,"-12-31",sep="")), by=1))

  stn=matrix()
  tmin=matrix()
  tmax=matrix()
  ppt=matrix()
  fdate=matrix()
  for (tmpfile in files){
#
# There is more data in this dataset we can extract later as we need it.
#
#
        tmpstring<-grep("MAX",readLines( gzfile(tmpfile)),value=TRUE,invert=TRUE)
        stn<-c(stn,as.numeric(as.character(substring(tmpstring,1,5))))
        tmax<-c(tmax,as.numeric(as.character(substring(tmpstring,103,108))))
        tmin<-c(tmin,as.numeric(as.character(substring(tmpstring,111,116))))
        ppt<-c(ppt,as.numeric(as.character(substring(tmpstring,119,123))))
        fdate<-c(fdate,as.Date(yearmoda<-substring(tmpstring,15,22),
		"%Y%m%d"))
  }

  stn<-as.numeric(stn)
  ppt<-as.numeric(ppt)
  tmax<-as.numeric(tmax)
  tmin<-as.numeric(tmin)
  fdate<-as.Date(as.numeric(fdate), origin="1970-01-01")
  forcing=data.frame(stn=stn,ppt=ppt,tmax=tmax,tmin=tmin,
  fdate=as.Date(fdate))
  forcing=na.omit(forcing)
  forcing=merge(alldates,forcing,all=TRUE)

  forcing$ppt_mm <- forcing$ppt*25.4
  forcing$tmax_C <- (forcing$tmax-32) * 5/9
  forcing$tmin_C <- (forcing$tmin-32) * 5/9
  forcing$tavg_C <-(forcing$tmin_C+forcing$tmax_C)/2
  forcing$ppt_mm[forcing$ppt_mm > 999]=0.0
  return(forcing)
  }

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(EcoHydRology)
Loading required package: operators

Attaching package: 'operators'

The following objects are masked from 'package:base':

    options, strrep

Loading required package: topmodel
Loading required package: DEoptim

DEoptim package
Differential Evolution algorithm in R
Authors: D. Ardia, K. Mullen, B. Peterson and J. Ulrich

Loading required package: XML
> png(filename="/home/ddbj/snapshot/RGM3/R_CC/result/EcoHydRology/build_gsod_forcing_data.Rd_%03d_medium.png", width=480, height=480)
> ### Name: build_gsod_forcing_data
> ### Title: Parsing gzipped GSOD dataset.
> ### Aliases: build_gsod_forcing_data
> 
> ### ** Examples
> 
> #
> # After running get_gsod_stn() with addis as a temp directory then:
> #
> 
> ## Not run: 
> ##D tmp_ppt<-build_gsod_forcing_data()
> ##D addis
> ## End(Not run)
> ## The function is currently defined as
> function(){
+   tmpdir=readline("Please enter a temp directory where you stored the
+   *.op.gz datafiles? \n")
+   files=dir(tmpdir,"*.op.gz",full.names=T)
+   start_year=min(substr(files,nchar(files)-9,nchar(files)-6))
+   end_year=max(substr(files,nchar(files)-9,nchar(files)-6))
+   alldates=data.frame(fdate=seq(from=as.Date(paste(start_year,"-01-01",
+   sep="")), to=as.Date(paste(end_year,"-12-31",sep="")), by=1))
+ 
+   stn=matrix()
+   tmin=matrix()
+   tmax=matrix()
+   ppt=matrix()
+   fdate=matrix()
+   for (tmpfile in files){
+ #
+ # There is more data in this dataset we can extract later as we need it.
+ #
+ #
+         tmpstring<-grep("MAX",readLines( gzfile(tmpfile)),value=TRUE,invert=TRUE)
+         stn<-c(stn,as.numeric(as.character(substring(tmpstring,1,5))))
+         tmax<-c(tmax,as.numeric(as.character(substring(tmpstring,103,108))))
+         tmin<-c(tmin,as.numeric(as.character(substring(tmpstring,111,116))))
+         ppt<-c(ppt,as.numeric(as.character(substring(tmpstring,119,123))))
+         fdate<-c(fdate,as.Date(yearmoda<-substring(tmpstring,15,22),
+ 		"%Y%m%d"))
+   }
+ 
+   stn<-as.numeric(stn)
+   ppt<-as.numeric(ppt)
+   tmax<-as.numeric(tmax)
+   tmin<-as.numeric(tmin)
+   fdate<-as.Date(as.numeric(fdate), origin="1970-01-01")
+   forcing=data.frame(stn=stn,ppt=ppt,tmax=tmax,tmin=tmin,
+   fdate=as.Date(fdate))
+   forcing=na.omit(forcing)
+   forcing=merge(alldates,forcing,all=TRUE)
+ 
+   forcing$ppt_mm <- forcing$ppt*25.4
+   forcing$tmax_C <- (forcing$tmax-32) * 5/9
+   forcing$tmin_C <- (forcing$tmin-32) * 5/9
+   forcing$tavg_C <-(forcing$tmin_C+forcing$tmax_C)/2
+   forcing$ppt_mm[forcing$ppt_mm > 999]=0.0
+   return(forcing)
+   }
function () 
{
    tmpdir = readline("Please enter a temp directory where you stored the\n  *.op.gz datafiles? \n")
    files = dir(tmpdir, "*.op.gz", full.names = T)
    start_year = min(substr(files, nchar(files) - 9, nchar(files) - 
        6))
    end_year = max(substr(files, nchar(files) - 9, nchar(files) - 
        6))
    alldates = data.frame(fdate = seq(from = as.Date(paste(start_year, 
        "-01-01", sep = "")), to = as.Date(paste(end_year, "-12-31", 
        sep = "")), by = 1))
    stn = matrix()
    tmin = matrix()
    tmax = matrix()
    ppt = matrix()
    fdate = matrix()
    for (tmpfile in files) {
        tmpstring <- grep("MAX", readLines(gzfile(tmpfile)), 
            value = TRUE, invert = TRUE)
        stn <- c(stn, as.numeric(as.character(substring(tmpstring, 
            1, 5))))
        tmax <- c(tmax, as.numeric(as.character(substring(tmpstring, 
            103, 108))))
        tmin <- c(tmin, as.numeric(as.character(substring(tmpstring, 
            111, 116))))
        ppt <- c(ppt, as.numeric(as.character(substring(tmpstring, 
            119, 123))))
        fdate <- c(fdate, as.Date(yearmoda <- substring(tmpstring, 
            15, 22), "%Y%m%d"))
    }
    stn <- as.numeric(stn)
    ppt <- as.numeric(ppt)
    tmax <- as.numeric(tmax)
    tmin <- as.numeric(tmin)
    fdate <- as.Date(as.numeric(fdate), origin = "1970-01-01")
    forcing = data.frame(stn = stn, ppt = ppt, tmax = tmax, tmin = tmin, 
        fdate = as.Date(fdate))
    forcing = na.omit(forcing)
    forcing = merge(alldates, forcing, all = TRUE)
    forcing$ppt_mm <- forcing$ppt * 25.4
    forcing$tmax_C <- (forcing$tmax - 32) * 5/9
    forcing$tmin_C <- (forcing$tmin - 32) * 5/9
    forcing$tavg_C <- (forcing$tmin_C + forcing$tmax_C)/2
    forcing$ppt_mm[forcing$ppt_mm > 999] = 0
    return(forcing)
}
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>