Last data update: 2014.03.03

R: predict.RWBP
predict.RWBPR Documentation

predict.RWBP

Description

Predict spatial outliers according to a RWBP model

Usage

## S3 method for class 'RWBP'
predict(object, top_k = 3, type = "raw", ...)

Arguments

object

a RWBP object

top_k

the number of outliers to extract

type

"raw" returns classification results (0 for normal, 1 for outlier). "prob" returns probabilities for being outlier.

...

currently not in use

Value

Returns the input data frame/matrix with an additional column that contains the prediction results. The additional column is set according to the type parameter:

raw

"class" column is added

prob

"prob" column is added

Author(s)

Sigal Shaked & Ben Nasi

References

Liu X., Lu C.T., Chen F.: Spatial outlier detection: Random walk based approaches. In: Proceedings of the 18th ACM SIGSPATIAL International Conference on Advances in Geographic Information Systems (ACM GIS), San Jose, CA (2010).

See Also

RWBP,RWBP-package

Examples

#an example dataset:
trainSet <- cbind(
c(7.092073,7.092631,7.09263,7.093052,7.092876,7.092689,7.092515,7.092321,
7.092138,7.11455,7.11441,7.11408,7.11376,7.11338,7.11305,7.11277,7.1124,
7.11202,7.11161,7.11115,7.11068,7.11014,7.10963,7.1095,7.1089,7.10818,
7.10747,7.10674,7.116691,7.116142,7.115559,7.115007,7.114423,7.113838,
7.113272,7.112684,7.112067,7.111458,7.110869,7.110274,7.109696,7.109131,
7.109231,7.108546,7.10797,5.599215,5.597609,5.596588,5.595359,5.594478,5.593652),
c(50.77849,50.77859,50.7786,50.77878,50.77914,50.77952,50.77992,50.78035,
50.78081,53.8,53.7,53.6,53.5,54.2,55.3,55.2,56.6,57.6,57.7,58.8,59.4,59.7,
59,59.03,59.3,60.7,60.8,61.4,50.73922,50.73914,50.73905,50.73899,50.73889,
50.73881,50.73873,50.73865,50.73856,50.73847,50.73838,50.73831,50.73822,
50.73814,50.73937,50.73805,50.73798,43.2034,43.20338,43.20352,43.2037,43.20391,43.20409),
c(106.5,107.6,25,108.5,109.1,109.7,111.6,113.3,113.3,62.3,333.7,331.5,327.2,
325.5,324.8,323.5,322.3,320.3,319,317.8,316,315.1,315.3,12,312.4,311.3,310.8,
309.4,99.2,99.2,101.1,99.5,101.3,105.3,104.3,104.4,106.3,108.8,110.3,111.7,113.3,
112.1,5000,111.6,109.8,125.6,130,132.3,133.4,138,143.4),
c(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,0,0,0,0,0,0,0,0)
)

colnames(trainSet)<- c("lng","lat","alt","isOutlier")

#first to columns of the input data are assumed to be spatial coordinates, 
#and the rest are non-spatial attributes according to which outliers will be extracted
myRW <- RWBP(as.data.frame(trainSet[,1:3]), clusters.iterations=6)

#predict classification:
testPrediction<-predict(myRW,3 )
#calculate accuracy:
sum(testPrediction$class==trainSet[,"isOutlier"])/nrow(trainSet)
#confusion table
table(testPrediction$class, trainSet[,"isOutlier"])

#other options:
myRW1 <- RWBP(isOutlier~lng+lat+alt, data=as.data.frame(trainSet))
#print model summary
print(myRW1)
#plot model graph
plot(myRW1)
#predict probabilities of each record to be an outlier:
predict(myRW1 , top_k=4,type="prob")

Results


R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(RWBP)
Loading required package: RANN
Loading required package: igraph

Attaching package: 'igraph'

The following objects are masked from 'package:stats':

    decompose, spectrum

The following object is masked from 'package:base':

    union

Loading required package: lsa
Loading required package: SnowballC
> png(filename="/home/ddbj/snapshot/RGM3/R_CC/result/RWBP/predict.RWBP.Rd_%03d_medium.png", width=480, height=480)
> ### Name: predict.RWBP
> ### Title: predict.RWBP
> ### Aliases: predict.RWBP
> ### Keywords: spatial cluster graphs classif
> 
> ### ** Examples
> 
> #an example dataset:
> trainSet <- cbind(
+ c(7.092073,7.092631,7.09263,7.093052,7.092876,7.092689,7.092515,7.092321,
+ 7.092138,7.11455,7.11441,7.11408,7.11376,7.11338,7.11305,7.11277,7.1124,
+ 7.11202,7.11161,7.11115,7.11068,7.11014,7.10963,7.1095,7.1089,7.10818,
+ 7.10747,7.10674,7.116691,7.116142,7.115559,7.115007,7.114423,7.113838,
+ 7.113272,7.112684,7.112067,7.111458,7.110869,7.110274,7.109696,7.109131,
+ 7.109231,7.108546,7.10797,5.599215,5.597609,5.596588,5.595359,5.594478,5.593652),
+ c(50.77849,50.77859,50.7786,50.77878,50.77914,50.77952,50.77992,50.78035,
+ 50.78081,53.8,53.7,53.6,53.5,54.2,55.3,55.2,56.6,57.6,57.7,58.8,59.4,59.7,
+ 59,59.03,59.3,60.7,60.8,61.4,50.73922,50.73914,50.73905,50.73899,50.73889,
+ 50.73881,50.73873,50.73865,50.73856,50.73847,50.73838,50.73831,50.73822,
+ 50.73814,50.73937,50.73805,50.73798,43.2034,43.20338,43.20352,43.2037,43.20391,43.20409),
+ c(106.5,107.6,25,108.5,109.1,109.7,111.6,113.3,113.3,62.3,333.7,331.5,327.2,
+ 325.5,324.8,323.5,322.3,320.3,319,317.8,316,315.1,315.3,12,312.4,311.3,310.8,
+ 309.4,99.2,99.2,101.1,99.5,101.3,105.3,104.3,104.4,106.3,108.8,110.3,111.7,113.3,
+ 112.1,5000,111.6,109.8,125.6,130,132.3,133.4,138,143.4),
+ c(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,1,0,0,0,0,0,0,0,0)
+ )
> 
> colnames(trainSet)<- c("lng","lat","alt","isOutlier")
> 
> #first to columns of the input data are assumed to be spatial coordinates, 
> #and the rest are non-spatial attributes according to which outliers will be extracted
> myRW <- RWBP(as.data.frame(trainSet[,1:3]), clusters.iterations=6)
> 
> #predict classification:
> testPrediction<-predict(myRW,3 )
> #calculate accuracy:
> sum(testPrediction$class==trainSet[,"isOutlier"])/nrow(trainSet)
[1] 0.9215686
> #confusion table
> table(testPrediction$class, trainSet[,"isOutlier"])
   
     0  1
  0 46  2
  1  2  1
> 
> #other options:
> myRW1 <- RWBP(isOutlier~lng+lat+alt, data=as.data.frame(trainSet))
> #print model summary
> print(myRW1)
 A Random Walk on Bipartite Graph spatial outlier detection model was built: 
 ---------------------------------------------------------------------------- 

 neighberhood size =  10 
 initial clusters amount =  8 
 each process increases clusters amount by  2  more clusters
 clusters iterations amount =  6 
 alfa =  0.5 
 dumping factor =  0.9 
 valid rows =  51  out of  51  input rows (records with empty values were removed) 

 a bipartite graph was built: 
IGRAPH UNWB 129 306 -- 
+ attr: name (v/c), type (v/l), RW.Y (e/n), avgDist (e/n), weight (e/n)
+ edges (vertex names):
 [1] 1 ---3    2 ---3    3 ---4    4 ---3    5 ---3    6 ---3    7 ---6   
 [8] 8 ---6    9 ---6    10---4    11---1    12---1    13---1    14---1   
[15] 15---1    16---1    17---1    18---1    19---1    20---1    21---1   
[22] 22---1    23---1    24---4    25---1    26---1    27---1    28---1   
[29] 29---5    30---5    31---5    32---5    33---5    34---3    35---5   
[36] 36---5    37---3    38---3    39---6    40---6    41---6    42---6   
[43] 43---2    44---6    45---3    46---8    47---8    48---8    49---8   
[50] 50---7    51---7    1 ---1005 2 ---1010 3 ---1006 4 ---1010 5 ---1010
+ ... omitted several edges

 outlier scores:  
      row_num outlierScore
 [1,]      43    0.5828438
 [2,]      10    0.5959659
 [3,]      46    0.6677292
 [4,]      51    0.7448139
 [5,]      50    0.7451195
 [6,]      42    0.7635689
 [7,]      13    0.7637470
 [8,]      28    0.8026196
 [9,]      47    0.9004970
[10,]      48    0.9009951
[11,]      49    0.9020957
[12,]       3    0.9167528
[13,]      11    0.9241027
[14,]      12    0.9242176
[15,]      38    0.9258850
[16,]      45    0.9267420
[17,]      15    0.9268838
[18,]      41    0.9270466
[19,]      40    0.9274883
[20,]      37    0.9275324
[21,]      44    0.9275440
[22,]      39    0.9277220
[23,]      16    0.9285311
[24,]      14    0.9289535
[25,]       4    0.9440723
[26,]       1    0.9441995
[27,]       7    0.9442752
[28,]       8    0.9442924
[29,]       9    0.9442924
[30,]       5    0.9444460
[31,]       6    0.9444807
[32,]       2    0.9452129
[33,]      24    0.9652561
[34,]      26    0.9744550
[35,]      27    0.9751114
[36,]      22    0.9754449
[37,]      21    0.9756838
[38,]      25    0.9757264
[39,]      20    0.9950154
[40,]      18    0.9950273
[41,]      19    0.9950798
[42,]      17    0.9951369
[43,]      34    0.9960694
[44,]      23    0.9962027
[45,]      36    0.9976380
[46,]      32    0.9978886
[47,]      35    0.9979231
[48,]      33    0.9979525
[49,]      31    0.9982022
[50,]      29    0.9982347
[51,]      30    0.9982347
> #plot model graph
> plot(myRW1)
> #predict probabilities of each record to be an outlier:
> predict(myRW1 , top_k=4,type="prob")
        lng      lat    alt         prob
1  7.092073 50.77849  106.5 1.300827e-01
2  7.092631 50.77859  107.6 1.276431e-01
3  7.092630 50.77860   25.0 1.961572e-01
4  7.093052 50.77878  108.5 1.303889e-01
5  7.092876 50.77914  109.1 1.294892e-01
6  7.092689 50.77952  109.7 1.294059e-01
7  7.092515 50.77992  111.6 1.299005e-01
8  7.092321 50.78035  113.3 1.298591e-01
9  7.092138 50.78081  113.3 1.298591e-01
10 7.114550 53.80000   62.3 9.684104e-01
11 7.114410 53.70000  333.7 1.784632e-01
12 7.114080 53.60000  331.5 1.781865e-01
13 7.113760 53.50000  327.2 5.644990e-01
14 7.113380 54.20000  325.5 1.667856e-01
15 7.113050 55.30000  324.8 1.717681e-01
16 7.112770 55.20000  323.5 1.678024e-01
17 7.112400 56.60000  322.3 7.457538e-03
18 7.112020 57.60000  320.3 7.721361e-03
19 7.111610 57.70000  319.0 7.594868e-03
20 7.111150 58.80000  317.8 7.749967e-03
21 7.110680 59.40000  316.0 5.428830e-02
22 7.110140 59.70000  315.1 5.486357e-02
23 7.109630 59.00000  315.3 4.891774e-03
24 7.109500 59.03000   12.0 7.939170e-02
25 7.108900 59.30000  312.4 5.418585e-02
26 7.108180 60.70000  311.3 5.724653e-02
27 7.107470 60.80000  310.8 5.566640e-02
28 7.106740 61.40000  309.4 4.709182e-01
29 7.116691 50.73922   99.2 0.000000e+00
30 7.116142 50.73914   99.2 0.000000e+00
31 7.115559 50.73905  101.1 7.811665e-05
32 7.115007 50.73899   99.5 8.332474e-04
33 7.114423 50.73889  101.3 6.792594e-04
34 7.113838 50.73881  105.3 5.212603e-03
35 7.113272 50.73873  104.3 7.499900e-04
36 7.112684 50.73865  104.4 1.436439e-03
37 7.112067 50.73856  106.3 1.702065e-01
38 7.111458 50.73847  108.8 1.741726e-01
39 7.110869 50.73838  110.3 1.697503e-01
40 7.110274 50.73831  111.7 1.703129e-01
41 7.109696 50.73822  113.3 1.713762e-01
42 7.109131 50.73814  112.1 5.649276e-01
43 7.109231 50.73937 5000.0 1.000000e+00
44 7.108546 50.73805  111.6 1.701787e-01
45 7.107970 50.73798  109.8 1.721094e-01
46 5.599215 43.20340  125.6 7.956493e-01
47 5.597609 43.20338  130.0 2.352909e-01
48 5.596588 43.20352  132.3 2.340919e-01
49 5.595359 43.20370  133.4 2.314422e-01
50 5.594478 43.20391  138.0 6.093423e-01
51 5.593652 43.20409  143.4 6.100780e-01
> 
> 
> 
> 
> 
> dev.off()
null device 
          1 
>