Attachment 'acopula.r'
Download 1 # ------------------------------------------ function repository
2
3 # --- utility functions (move to utils.r afterwards)
4
5 #numerical (linear) approximation of partial derivatives of a real function
6 #fun can have arbitrary number of arguments, as well as derivatives can by of arbitrary order
7 #'order' is a sequence of the same length as is the number of arguments
8 #'difference' controls accuracy, 'area' allows for one-sided derivatives
9 #Example: nderive(function(x,y) x^2*y,c(5,11),c(2,0)) #22
10 #fun arguments can be sequence as well as vector
11 nderive <- function(fun,point=c(0),order=c(1),difference=1e-4,area=c(0,1,-1)) {
12 diffup <- difference*(1+sign(area[1]))/2; difflo <- difference*(1-sign(area[1]))/2
13 ind <- t(expand.grid(lapply(order,function(x) seq(0,x,1))))
14 arg <- point + (order-ind)*diffup - ind*difflo
15 if(length(formals(fun)) == length(point)) { #treatment of arguments in sequence e.g. fun(x,y) instead of fun(c(x,y))
16 rownames(arg)<-NULL #due to unwanted passing of argument names in do.call
17 func <- function(x) do.call(fun,as.list(x))
18 }
19 else func <- fun
20 sum(
21 (-1)^apply(ind,2,sum) *
22 apply(choose(order,ind),2,prod) *
23 apply(arg,2,func)
24 ) / (diffup+difflo)^sum(order)
25 }
26
27 #numerical integration of arbitrarily dimensional function
28 #arguments can form either vector or sequence
29 #possible to reduce integral with some bounds being equal
30 nintegrate <- function(fun, lower, upper, subdivisions=100, differences=(upper-lower)/subdivisions) {
31 argseq <- mapply(seq.int,from=lower,to=upper,by=differences,SIMPLIFY=F,USE.NAMES=F) #make sequence
32 #argseq <- mapply(function(x,y) unique(c(x,y)),argseq,upper,SIMPLIFY=F,USE.NAMES=F) #add upper limits to the sequence and remove if double
33 differences <- rep(differences,length.out=length(argseq)) #ensure differences has the proper length
34 indargseq <- lapply(argseq,seq_along) #indices of the sequence values
35 nind <- sapply(indargseq,length) #number of grid nodes in each direction
36 argsgrid <- as.matrix(expand.grid(argseq)); dimnames(argsgrid) <- NULL #grid nodes
37 indargsgrid <- as.matrix(expand.grid(indargseq)); dimnames(indargsgrid) <- NULL #and their indices
38 mult <- apply(indargsgrid,1,function(x) 2^sum(x>1 & x<nind)) #multiple of each node
39 if(length(formals(fun)) > 1) func <- function(x) do.call(fun,as.list(x)) #treatment of arguments in sequence e.g. fun(x,y) instead of fun(c(x,y))
40 else func <- fun
41 fvalues <- apply(argsgrid,1,func) # compute function values in each node
42 prod(differences[nind>1])*sum(fvalues*mult)/2^length(argseq[nind>1]) #final magic
43 }
44
45 #splits vector x to subvectors of specified lengths; create list or simplify to matrix if possible (identical lengths)
46 vpartition <- function(x, lengths, matrixify=TRUE) {
47 n <- length(lengths)
48 up <- cumsum(lengths)
49 lo <- c(0,up[-n])
50 sapply(mapply(function(i,j) c(x[1],x)[i:j], lo+1, up+1, SIMPLIFY = F),function(y) y[-1],simplify=matrixify)#treats zero lengths
51 }
52
53 #CDF of 4-parametric Pareto distribution (type IV), pars[1:3] > 0, location pars[4] in R
54 pPareto <- function(t,pars) ifelse(t>=pars[4], 1-(1+((t-pars[4])/pars[1])^(1/pars[3]))^(-pars[2]), 0)
55 qPareto <- function(t,pars) pars[1]*((1-t)^(-1/pars[2])-1)^(pars[3])+pars[4]
56
57 # --- copula-related functions
58
59 #create function from 'base' and feed with 'data' (both being matrix or data frame)
60 pCopulaEmpirical <- function(data,base=data) {
61 data <- rbind(data, deparse.level=0 ) #ensure data is matrix/data.frame
62 fCe <- function(x) sum(apply(t(base)<=x,2,prod))
63 apply(data,1,fCe)/nrow(base)
64 }
65
66 genLog <- function(...) {
67 output <- list(
68 parameters = NULL,
69 pcopula = function(t, pars) prod(t),
70 dcopula = function(t, pars) 1,
71 rcopula = function(dim, pars) runif(dim),
72 gen = function(t, pars) -log(t),
73 gen.der = function(t, pars) -1/t,
74 gen.der2 = function(t, pars) 1/t^2,
75 gen.inv = function(t, pars) exp(-t),
76 gen.inv.der = function(t, pars) -exp(-t),
77 gen.inv.der2 = function(t, pars) exp(-t),
78 lower = NULL,
79 upper = NULL,
80 id="log"
81 )
82 output[names(list(...))] <- list(...) #allow user to modify(=replace) definition
83 output
84 }
85
86 genGumbel <- function(...) {
87 output <- list(
88 parameters = c(4),
89 pcopula = function(t, pars) exp(-sum((-log(t))^pars[1])^(1/pars[1])),
90 gen = function(t, pars) (-log(t))^pars[1],
91 gen.der = function(t, pars) -pars[1]*(-log(t))^(pars[1]-1)/t,
92 gen.der2 = function(t, pars) pars[1]*(-log(t))^(pars[1]-2)*(pars[1]-1-log(t))/t^2,
93 gen.inv = function(t, pars) exp(-t^(1/pars[1])),
94 gen.inv.der = function(t, pars) -exp(-t^(1/pars[1]))*t^(1/pars[1]-1)/pars[1],
95 gen.inv.der2 = function(t, pars) exp(-t^(1/pars[1]))*t^(1/pars[1]-2)*(pars[1]+t^(1/pars[1])-1)/pars[1]^2,
96 kendall = list(coef=function(t) 1-1/t, icoef=function(t) 1/(1-t), bounds=c(0,1)),
97 spearman = list(coef=function(t) pPareto(t,c(1.41917,2.14723,1,1)), icoef=function(t) qPareto(t,c(1.41917,2.14723,1,1)), bounds=c(0,1)),
98 lower = 1, #Pi, g(t)=-ln(t)
99 upper = Inf, #M
100 id="Gumbel"
101 )
102 output[names(list(...))] <- list(...)
103 output
104 }
105
106 #to fix: g(u)/(g(u)+g(v)) sends NaN #edit: still true?
107 genClayton <- function(...) {
108 output <- list(
109 parameters = c(2),
110 gen = function(t, pars) t^(-pars[1]) - 1,
111 gen.der = function(t,pars) -pars[1]*t^(-pars[1] - 1),
112 gen.der2 = function(t, pars) pars[1]*(1+pars[1])*t^(-2-pars[1]),
113 gen.inv = function(t, pars) (1+t)^(-1/pars[1]),
114 gen.inv.der = function(t, pars) -(1+t)^(-1-1/pars[1])/pars[1],
115 gen.inv.der2 = function(t, pars) (1 + 1/pars[1])*(1 + t)^(-2 - 1/pars[1])/pars[1],
116 kendall = list(coef=function(t) t/(t+2), icoef=function(t) 2*t/(1-t), bounds=c(0,1)),
117 spearman = list(coef=function(t) pPareto(t,c(0.496534,1.95277,0.986814,0)), icoef=function(t) qPareto(t,c(0.496534,1.95277,0.986814,0)), bounds=c(0,1)),
118 lower = 0.01, upper = Inf,
119 id="Clayton"
120 )
121 output[names(list(...))] <- list(...)
122 output
123 }
124
125 genFrank <- function(...) {
126 output <- list(
127 parameters = c(5),
128 gen = function(t, pars) if(abs(pars[1]) < 0.00001) return(-log(t)) else -log( (exp(-pars[1]*t)-1)/(exp(-pars[1])-1) ),
129 gen.der = function(t, pars) if(abs(pars[1]) < 0.00001) return(-1/t) else pars[1]*exp(-pars[1]*t)/(exp(-pars[1]*t)-1),
130 gen.der2 = function(t, pars) if(abs(pars[1]) < 0.00001) return(1/t^2) else pars[1]^2*exp(pars[1]*t)/(1-exp(pars[1]*t))^2,
131 gen.inv = function(t, pars) if(abs(pars[1]) < 0.00001) return(exp(-t)) else -log(1-exp(-t)+exp(-t-pars[1]))/pars[1],
132 gen.inv.der = function(t, pars) if(abs(pars[1]) < 0.00001) return(-exp(-t)) else -exp(-t)/(-exp(-t)+1/(1-exp(-pars[1])))/pars[1],
133 gen.inv.der2 = function(t, pars) if(abs(pars[1]) < 0.00001) return(exp(-t)) else (exp(pars[1]+t)*(-1+exp(pars[1])))/((1-exp(pars[1])+exp(pars[1]+t))^2*pars[1]),
134 kendall = list(coef=function(t) pPareto(t,c(7.46846,1.25305,0.854724,0)), icoef=function(t) qPareto(t,c(7.46846,1.25305,0.854724,0)), bounds=c(0,1)), #only positive dep.
135 spearman = list(coef=function(t) pPareto(t,c(13.2333,3.67989,0.857562,0)), icoef=function(t) qPareto(t,c(13.2333,3.67989,0.857562,0)), bounds=c(0,1)), #only positive dep.
136 lower = -Inf, upper = Inf,
137 id="Frank"
138 )
139 output[names(list(...))] <- list(...)
140 output
141 }
142
143 genJoe <- function(...) {
144 output <- list(
145 parameters = c(3),
146 gen = function(t, pars) -log(1-(1-t)^pars[1]),
147 gen.der = function(t, pars) -pars[1]*(1-t)^(pars[1]-1)/(1-(1-t)^pars[1]),
148 gen.der2 = function(t, pars) pars[1]*(pars[1]-1+(1-t)^pars[1])*(1-t)^(pars[1]-2)/(1-(1-t)^pars[1])^2,
149 gen.inv = function(t, pars) 1-(1-exp(-t))^(1/pars[1]),
150 gen.inv.der = function(t, pars) (1 - exp(-t))^(1/pars[1])/(pars[1]*(1 - exp(t))),
151 gen.inv.der2 = function(t, pars) -(1-exp(-t))^(1/pars[1])*(1-pars[1]*exp(t))/(pars[1]*(1-exp(t)))^2,
152 kendall = list(coef=function(t) pPareto(t,c(1.901055,1.015722,1.038055,1)), icoef=function(t) qPareto(t,c(1.901055,1.015722,1.038055,1)), bounds=c(0,1)), #only positive dep.
153 spearman = list(coef=function(t) pPareto(t,c(2.42955,1.99806,1.02288,1)), icoef=function(t) qPareto(t,c(2.42955,1.99806,1.02288,1)), bounds=c(0,1)), #only positive dep.
154 lower = 1, #Pi, g(t)=-ln(t)
155 upper = Inf, #M
156 id="Joe"
157 )
158 output[names(list(...))] <- list(...)
159 output
160 }
161
162 genAMH <- function(...) {
163 output <- list(
164 parameters = c(0.1),
165 gen = function(t, pars) log((1-(1-t)*pars[1])/t),
166 gen.der = function(t, pars) (pars[1]-1)/(t*(1-(1-t)*pars[1])),
167 gen.der2 = function(t, pars) (pars[1]-1)*(1+pars[1]*(2*t-1))/(t*(1-(1-t)*pars[1]))^2,
168 gen.inv = function(t, pars) (1-pars[1])/(exp(t)-pars[1]),
169 gen.inv.der = function(t, pars) -exp(t)*(1-pars[1])/(exp(t)-pars[1])^2,
170 gen.inv.der2 = function(t, pars) exp(t)*(exp(t)+pars[1])*(1-pars[1])/(exp(t)-pars[1])^3,
171 kendall = list(coef=function(t) (3*t-2)/(3*t)-(2*(1-t)^2*log(1-t))/(3*t^2), icoef=NULL, bounds=c(-0.1817258,1/3)),
172 spearman = list(coef=function(t) 0.641086*(-1 + 1.71131^t), icoef=function(t) log(t/0.641086+1,1.71131), bounds=c(-0.271065,0.478418)),
173 lower = -1, #par=0 => Pi
174 upper = 0.9999,
175 id="AMH"
176 )
177 output[names(list(...))] <- list(...)
178 output
179 }
180
181 generator <- function(name,...) {
182 switch(name[1],
183 "AMH"=genAMH(...),
184 "Clayton"=genClayton(...),
185 "Frank"=genFrank(...),
186 "Gumbel"=genGumbel(...),
187 "Joe"=genJoe(...),
188 "log"=genLog(...),
189 ({warning("Generator not recognized. Defaults to genLog().", immediate.=T);
190 genLog(...)})
191 )
192 }
193
194 #upper bound of all Pickands' dependence functions
195 dep1 <- function(...) {
196 output <- list(
197 parameters = NULL,
198 dep = function(t,pars) 1,
199 dep.der = function(t,pars) 0,
200 dep.der2 = function(t,pars) 0,
201 lower = NULL,
202 upper = NULL,
203 id="1"
204 )
205 output[names(list(...))] <- list(...)
206 output
207 }
208
209 #lower bound of all Pickands' dependence functions
210 depMax <- function(power=10,...) {
211 output <- list(
212 parameters = NULL,
213 dep = function(t,pars=NULL) (sum(t^power) + (1 - sum(t))^power)^(1/power),
214 dep.der = function(t,pars=NULL) (t^(power-1)-(1-t)^(power-1))*(t^power + (1 - t)^power)^(1/power-1),
215 dep.der2 = function(t,pars=NULL) (power-1)*((1-t)*t)^(power-2)*(t^power + (1 - t)^power)^(1/power-2),
216 lower = NULL,
217 upper = NULL,
218 id="max"
219 )
220 output[names(list(...))] <- list(...)
221 output
222 }
223
224 #depMax <- list(parameters = NULL,
225 # dep = function(t,pars) max(t,1-sum(t)),
226 # lower = NULL,
227 # upper = NULL
228 #)
229
230 depGumbel <- function(...) {
231 output <- list(
232 parameters = c(2),
233 pcopula = function(t, pars) exp(-sum((-log(t))^pars[1])^(1/pars)),
234 dcopula = NULL,
235 rcopula = NULL,
236 dep = function(t,pars) (sum(t^pars[1]) + (1 - sum(t))^pars[1])^(1/pars[1]),
237 dep.der = function(t,pars) (t^(pars[1]-1)-(1-t)^(pars[1]-1)) * (t^pars[1]+(1-t)^pars[1])^(1/pars[1]-1),
238 dep.der2 = function(t,pars) (pars[1]-1)*(-(t-1)*t)^(pars[1]-2)*(t^pars[1] + (1 - t)^pars[1])^(1/pars[1]-2),
239 kendall = list(coef=function(t) 1-1/t, icoef=function(t) 1/(1-t), bounds=c(0,1)),
240 spearman = list(coef=function(t) pPareto(t,c(1.41917,2.14723,1,1)), icoef=function(t) qPareto(t,c(1.41917,2.14723,1,1)), bounds=c(0,1)),
241 lower = c(1), #A=1, upper bound of all Pickands' dependence functions
242 upper = Inf, #A=max(t1,t2,...,1-t1-t2-...), lower bound of all Pickands' dependence functions
243 id="Gumbel"
244 )
245 output[names(list(...))] <- list(...)
246 output
247 }
248
249 #so far only 2D
250 depGalambos <- function(...) {
251 output <- list(
252 parameters = c(0.5),
253 dep = function(t,pars) {
254 #check for boundary arguments of A (prevents NAN)
255 if(any(
256 c(sapply(1:(length(t)+1),function(x) sum(c(t,1-sum(t))[-x]) > 1 )),
257 pars[1] < 1e-5
258 )
259 ) return(1)
260 1 - (sum(t^-pars[1]) + (1 - sum(t))^-pars[1])^(-1/pars[1])
261 },
262 dep.der = function(t,pars) if(pars[1] < 1e-5) return(0) else ((1-t)^(-1-pars[1])-t^(-1-pars[1]))/((1-t)^-pars[1]+t^-pars[1])^(1+1/pars[1]),
263 dep.der2 = function(t,pars) {
264 if(pars[1] < 1e-5) return(0)
265 if(abs(pars[1]-1) < 1e-5) return(2)
266 ((1+pars[1])*(-(-1+t)*t)^(-2+pars[1])) * ((1-t)^-pars[1]+t^-pars[1])^(-1/pars[1])/((1-t)^pars[1]+t^pars[1])^2
267 },
268 kendall = list(coef=function(t) pPareto(t,c(0.579021,0.382682,0.48448,0)), icoef=function(t) qPareto(t,c(0.579021,0.382682,0.48448,0)), bounds=c(0,1)),
269 spearman = list(coef=function(t) pPareto(t,c(0.876443,0.484209,0.307277,0)), icoef=function(t) qPareto(t,c(0.876443,0.484209,0.307277,0)), bounds=c(0,1)),
270 lower = c(0), #A=1
271 upper = c(10), #A=max(t1,t2,...,1-t1-t2-...)
272 id="Galambos"
273 )
274 output[names(list(...))] <- list(...)
275 output
276 }
277
278
279 #asymmetric logistic Picands' dependence function (Tawn, J. A. (1988). Bivariate extreme value theory: models and estimation)
280 #the first is the dependence parameter, all other are the asymmetry parameters (which when equal, results in symmetric logistic depfu)
281 depTawn <- function(dim=2,...) {
282 parameters <- c(2,rep.int(0.5,dim))
283 dep <- function(t,pars) {
284 1 - pars[-1]%*%c(t,1-sum(t)) + sum((pars[-1]*c(t,1-sum(t)))^pars[1])^(1/pars[1])
285 }
286 dep.der <- function(t,pars) {
287 pars[3] - pars[2] + (1/pars[1])*((pars[2]*t)^pars[1]+(pars[3]*(1-t))^pars[1])^(1/pars[1]-1)*
288 (pars[2]^pars[1]*pars[1]*t^(pars[1]-1) - pars[3]^pars[1]*pars[1]*(1-t)^(pars[1]-1))
289 }
290 dep.der2 <- function(t,pars) {
291 (pars[2]*pars[3])^pars[1] * (pars[1]-1) * ((1-t)*t)^(pars[1]-2) * ((pars[2]*t)^pars[1]+(pars[3]*(1-t))^pars[1])^(1/pars[1]-2)
292 }
293 lower <- c(1,rep.int(0,dim)) #A=1, indep.
294 upper <- c(Inf,rep.int(1,dim)) #A=max(t1,t2,...,1-t1-t2-...), perf.pos.dep
295 output <- list(
296 parameters=parameters,dep=dep,
297 dep.der=if(dim==2) dep.der else NULL, dep.der2=if(dim==2) dep.der2 else NULL,
298 lower=lower,upper=upper,
299 id="Tawn"
300 )
301 output[names(list(...))] <- list(...)
302 output
303 }
304
305
306 #2D only
307 depHuslerReiss <- function(...) {
308 output <- list(
309 parameters = c(0.5),
310 dep = function(t,pars) {
311 if(pars[1]==0) return(1)
312 t*pnorm(1/pars[1] + pars[1]/2*log(t/(1-t))) + (1-t)*pnorm(1/pars[1] - pars[1]/2*log(t/(1-t)))
313 },
314 dep.der = function(t,pars) {
315 if(pars[1]==0) return(0)
316 pnorm(1/pars[1]+pars[1]/2*log(t/(1-t))) + pnorm(-1/pars[1]+pars[1]/2*log(t/(1-t))) - 1
317 },
318 dep.der2 = function(t,pars) {
319 if(is.finite(t) && (t <= 0 || t >= 1)) return(0)
320 exp(-(4+pars[1]^4*log(t/(1-t))^2)/(8*pars[1]^2)) * pars[1] * sqrt(t/(2*pi*(1-t))) / (2*(1-t)*t^2)
321 },
322 kendall = list(coef=function(t) pPareto(t,c(0.799473,0.25111,0.298499,0)), icoef=function(t) qPareto(t,c(0.799473,0.25111,0.298499,0)), bounds=c(0,1)),
323 spearman = list(coef=function(t) pPareto(t,c(0.877543,0.485643,0.307806,0)), icoef=function(t) qPareto(t,c(0.876443,0.484209,0.307277,0)), bounds=c(0,1)),
324 lower = c(0), #A=1, indep.
325 upper = Inf, #A=max(t,1-t), perf.pos.dep
326 id="Husler-Reiss"
327 )
328 output[names(list(...))] <- list(...)
329 output
330 }
331
332 #general convex combination of dependence functions
333 #this function creates object (list) to be used in construction of archimax copula
334 #allows to include also parameters of constituting dependence functions (lined up before the combination parameters)
335 #the dimension of random vector need to be specified
336 #combination parameters are ordered variable-wise (i.e. first dim parameters are related to the first dependence function)
337 #with symmetry=TRUE the function depCC is called.
338 #requires: vpartition()
339 depGCC <- function(depfun=list(dep1(),depGumbel()),
340 dparameters=lapply(depfun,function(x) rep(list(NULL),max(1,length(x$parameters)))),
341 dim=2,symmetry=FALSE) {
342 if(symmetry) return(depCC(depfun=depfun,dparameters=dparameters,dim=dim))
343 dparameters <- lapply(dparameters,unlist)
344 nd <- length(depfun)
345 A <- lapply(depfun,function(x) x$dep) #extract dependence functions
346 Ad <- lapply(depfun,function(x) x$dep.der)
347 Add <- lapply(depfun,function(x) x$dep.der2)
348 np <- mapply(function(dep,par) {if(is.null(par)) length(dep$parameters) else 0}, depfun, dparameters) #count depfus' parameters that are to be estimated
349 iD <- vpartition(1:sum(np),np); iC <- 1:(nd*dim)+sum(np) #indices of the depfus'(as list) and the combination parameters (as vector)
350 dep <- function(t, pars) {
351 pD <- mapply(function(static,idynamic) c(static,pars[idynamic]), dparameters, iD, SIMPLIFY=F,USE.NAMES=F) #extract depfus' pars
352 pC <- combpars(pars); i0 <- pC[["i0"]]; pC <- pC[["par"]] #treat zero rows/cols, normalize (colsums=1), extract indices of depfus kept
353 tpars <- matrix(c(t,1-sum(t)),byrow=T,nrow=nrow(pC[i0, ,drop=F]),ncol=dim) * pC[i0, ,drop=F] #t * pars (normalized and trimed)
354 i00 <- as.logical(apply(tpars,1,sum)) # treat the zero rows occurence
355 sum(mapply(
356 function(PiDF,par,arg) sum(arg)*PiDF(arg[-dim]/sum(arg),par),
357 A[i0][i00], #remove those depfus and their parameters, for which the combination parameters (and also the multiplication with arguments) were zero (zero rows)
358 pD[i0][i00],
359 lapply(apply(tpars[i00, ,drop=F],1,list),unlist) #remove zero rows, isolate the remaining rows
360 ))
361 }
362 dep.der <- function(t, pars) {
363 pD <- mapply(function(static,idynamic) c(static,pars[idynamic]), dparameters, iD, SIMPLIFY=F,USE.NAMES=F) #extract depfus' pars
364 pC <- combpars(pars); i0 <- pC[["i0"]]; pC <- pC[["par"]][i0, ,drop=F] #treat zero rows/cols, normalize (colsums=1), extract indices of depfus kept
365 tpars <- matrix(c(t,1-sum(t)),byrow=T,nrow=nrow(pC),ncol=dim)*pC
366 i00 <- as.logical(apply(tpars,1,sum)) # treat the zero rows occurence
367 sum(mapply(
368 function(PiDF,PiDFd,par,arg,pc) {(pc[1]-pc[2])*PiDF(arg[1]/sum(arg),par) + pc[1]*pc[2]/sum(arg)*PiDFd(arg[1]/sum(arg),par)},
369 A[i0][i00], #remove those depfus and their parameters, for which the combination parameters (and also the multiplication with arguments) were zero (zero rows)
370 Ad[i0][i00],
371 pD[i0][i00],
372 lapply(apply(tpars[i00, ,drop=F],1,list),unlist), #remove zero rows, isolate the remaining rows
373 lapply(apply(pC[i00, ,drop=F],1,list),unlist)
374 ))
375 }
376 dep.der2 <- function(t, pars) {
377 pD <- mapply(function(static,idynamic) c(static,pars[idynamic]), dparameters, iD, SIMPLIFY=F,USE.NAMES=F) #extract depfus' pars
378 pC <- combpars(pars); i0 <- pC[["i0"]]; pC <- pC[["par"]][i0, ,drop=F] #treat zero rows/cols, normalize (colsums=1), extract indices of depfus kept
379 tpars <- matrix(c(t,1-sum(t)),byrow=T,nrow=nrow(pC),ncol=dim)*pC
380 i00 <- as.logical(apply(tpars,1,sum)) # treat the zero rows occurence
381 sum(mapply(
382 function(PiDFdd,par,arg,pc) (pc[1]*pc[2])^2/sum(arg)^3*PiDFdd(arg[1]/sum(arg),par),
383 Add[i0][i00],#remove those depfus and their parameters, for which the combination parameters (and also the multiplication with arguments) were zero (zero rows)
384 pD[i0][i00],
385 lapply(apply(tpars[i00, ,drop=F],1,list),unlist), #remove zero rows, isolate the remaining rows
386 lapply(apply(pC[i00, ,drop=F],1,list),unlist)
387 ))
388 }
389 combpars <- function(pars) { #true combination parameters piled to matrix (#rows = #dep.functions) and indicators of nonzero rows(i0)/columns(j0)
390 pC <- matrix(pars[iC],ncol=dim,nrow=nd,byrow=T) #extract combination parameters, pile them to matrix {p_ji} i=1...dim j=1...length(dep)
391 if(all(pC==0)) pC <- pC + 1 # if all comb.parameters are 0, change them to 1
392 i0 <- as.logical(apply(pC,1,sum)); # find non-zero rows
393 j0 <- as.logical(apply(pC,2,sum)); pC[i0,!j0] <- 1 # find and fill zero columns with a non-zero constant (e.g. 1) = treat zeros as equal weights
394 pCn <- apply(pC,2,function(x) x/sum(x)); dim(pCn) <- dim(pC) #normalize the parameters so that column sums = 1, prevent reducing dimension
395 list(par=pCn, i0=i0, j0=j0)
396 }
397 rescalepars <- function(pars,names=TRUE) {
398 idep <- 0:(min(iC)-1)
399 result <- c(dep=pars[idep],comb=t(combpars(pars)$par))
400 if(!isTRUE(names)) names(result) <- NULL
401 result
402 }
403 stpar <- function(lo,up) {
404 lo[is.infinite(lo)] <- pmin(up-1,-11)[is.infinite(lo)]
405 up[is.infinite(up)] <- pmax(lo+1, 11)[is.infinite(up)]
406 (lo+up)/2
407 }
408 lower <- c(unlist(mapply(function(x,y) x$lower[0:y], depfun, np)),rep.int(0,nd*dim))
409 upper <- c(unlist(mapply(function(x,y) x$upper[0:y], depfun, np)),rep.int(1,nd*dim))
410 parameters <- stpar(lower,upper)
411 list(
412 parameters=parameters,dep=dep,
413 dep.der=if(dim==2) dep.der else NULL, dep.der2=if(dim==2) dep.der2 else NULL,
414 lower=lower,upper=upper,combpars=combpars,rescalepars=rescalepars,id="gcc"
415 )
416 }
417
418 #convex sum of dependence functions
419 #this function creates object (list) to be used in construction of archimax copula
420 #allows to include also parameters of constituting dependence functions (lined up before the combination parameters)
421 #the dimension of random vector need to be specified
422 #requires: vpartition()
423 depCC <- function(depfun=list(dep1(),depGumbel()),dparameters=lapply(depfun,function(x) rep(list(NULL),max(1,length(x$parameters)))),dim=2) {
424 dparameters <- lapply(dparameters,unlist)
425 nd <- length(depfun)
426 A <- lapply(depfun,function(x) x$dep) #extract dependence functions
427 Ad <- lapply(depfun,function(x) x$dep.der)
428 Add <- lapply(depfun,function(x) x$dep.der2)
429 np <- mapply(function(dep,par) {if(is.null(par)) length(dep$parameters) else 0}, depfun, dparameters) #count depfus' parameters that are to be estimated
430 iD <- vpartition(1:sum(np),np); iC <- 1:(nd)+sum(np) #indices of the depfus'(as list) and the combination parameters (as vector)
431 dep <- function(t, pars) {
432 pD <- mapply(function(static,idynamic) c(static,pars[idynamic]), dparameters, iD, SIMPLIFY=F,USE.NAMES=F) #extract depfus' pars
433 pC <- combpars(pars); i0 <- pC[["i0"]]; pC <- pC[["par"]] #extract, treat zero vector, normalize (sum=1), get indices of nonzero values
434 sum(mapply(function(PiDF,par) PiDF(t,par), A[i0], pD[i0]) * pC[i0])
435 }
436 dep.der <- function(t, pars) {
437 pD <- mapply(function(static,idynamic) c(static,pars[idynamic]), dparameters, iD, SIMPLIFY=F,USE.NAMES=F) #extract depfus' pars
438 pC <- combpars(pars); i0 <- pC[["i0"]]; pC <- pC[["par"]] #extract, treat zero vector, normalize (sum=1), get indices of nonzero values
439 sum(mapply(function(PiDFd,par) PiDFd(t,par), Ad[i0], pD[i0]) * pC[i0])
440 }
441 dep.der2 <- function(t, pars) {
442 pD <- mapply(function(static,idynamic) c(static,pars[idynamic]), dparameters, iD, SIMPLIFY=F,USE.NAMES=F) #extract depfus' pars
443 pC <- combpars(pars); i0 <- pC[["i0"]]; pC <- pC[["par"]] #extract, treat zero vector, normalize (sum=1), get indices of nonzero values
444 sum(mapply(function(PiDFdd,par) PiDFdd(t,par), Add[i0], pD[i0]) * pC[i0])
445 }
446 combpars <- function(pars) { #true combination parameters
447 pC <- pars[iC] #extract combination parameters
448 if(all(pC==0)) pC <- pC + 1 # if all comb.parameters are 0, change them to 1
449 i0 <- (pC > 0) # find non-zero (and non-negative) values
450 list(par=pC/sum(pC),i0=i0)
451 }
452 rescalepars <- function(pars,names=TRUE) {
453 idep <- 0:(min(iC)-1)
454 result <- c(dep=pars[idep],comb=t(combpars(pars)$par))
455 if(!isTRUE(names)) names(result) <- NULL
456 result
457 }
458 stpar <- function(lo,up) {
459 lo[is.infinite(lo)] <- pmin(up-1,-11)[is.infinite(lo)]
460 up[is.infinite(up)] <- pmax(lo+1, 11)[is.infinite(up)]
461 (lo+up)/2
462 }
463 lower <- c(unlist(mapply(function(x,y) x$lower[0:y], depfun, np)),rep.int(0,nd))
464 upper <- c(unlist(mapply(function(x,y) x$upper[0:y], depfun, np)),rep.int(1,nd))
465 parameters <- stpar(lower,upper)
466 list(
467 parameters=parameters,dep=dep,
468 dep.der=if(dim==2) dep.der else NULL, dep.der2=if(dim==2) dep.der2 else NULL,
469 lower=lower,upper=upper,combpars=combpars,rescalepars=rescalepars,id="cc"
470 )
471 }
472
473 #return dependece function list or (unnamed) list of dependence functions list
474 depfun <- function(name,...) {
475 ldep <- lapply(
476 name,
477 function(x) switch(x,
478 "1"=dep1(...),
479 "Galambos"=depGalambos(...),
480 "Gumbel"=depGumbel(...),
481 "Husler-Reiss"=depHuslerReiss(...),
482 "max"=depMax(...),
483 "Tawn"=depTawn(...),
484 "gcc"=depGCC(...),
485 "cc"=depCC(...),
486 ({warning("Dependence function not recognized. Defaults to dep1().", immediate.=T);
487 dep1()})
488 )
489 )
490 if(length(ldep)==1) ldep[[1]] else ldep
491 }
492
493 #is there a bug when apply(somematrix,1,function(x) x/sum(x)) ??
494
495 #lapply/mapply are not able to return correct list of functions (contains only copies of the last function)
496 #e.g. mapply(function(a,b) c(function(c) a*b+c), list(1,2,3),list(10,100,1000))
497
498 #list of 5 dependence functions corresponding to all possible partitions of vector {1,2,3}, where 3 is number of dimensions
499 ldepPartition3D <- function(power=8) {
500 dmax <- if(is.infinite(power)) {function(t) max(t)} else {function(t) sum(t^power)^(1/power)}
501 mapply(
502 function(x,y) list(parameters=NULL,dep=x,lower=NULL,upper=NULL,id=y),
503 list(
504 function(t,pars) 1, #P={{1},{2},{3}}
505 function(t,pars) dmax(c(t,1-sum(t))), #P={{1,2,3}}
506 function(t,pars) dmax(c(t[-1],1-sum(t)))+t[1], #P={{1},{2,3}}
507 function(t,pars) dmax(c(t[-2],1-sum(t)))+t[2], #P={{2},{1,3}}
508 function(t,pars) dmax(t)+1-sum(t) #P={{1,2},{3}}
509 ),
510 list("1","max","max+1","max+2","max+3"),
511 SIMPLIFY=FALSE
512 )
513 }
514
515 copProduct <- function(...) {
516 output <- list(
517 parameters = NULL,
518 pcopula = function(t, pars) prod(t),
519 dcopula = function(t, pars) 1,
520 rcopula = function(dim, pars) runif(dim),
521 lower = NULL,
522 upper = NULL,
523 id="product"
524 )
525 output[names(list(...))] <- list(...)
526 output
527 }
528
529 copGumbel <- function(...) {
530 output <- list(
531 parameters = c(4),
532 pcopula = function(t, pars) exp(-sum((-log(t))^pars[1])^(1/pars[1])),
533 kendall = list(coef=function(t) 1-1/t, icoef=function(t) 1/(1-t), bounds=c(0,1)),
534 spearman = list(coef=function(t) pPareto(t,c(1.41917,2.14723,1,1)), icoef=function(t) qPareto(t,c(1.41917,2.14723,1,1)), bounds=c(0,1)),
535 lower = 1, #Pi, g(t)=-ln(t)
536 upper = Inf, #M
537 id="Gumbel"
538 )
539 output[names(list(...))] <- list(...)
540 output
541 }
542
543 copFGM <- function(...) {
544 output<- list(
545 parameters = c(0.5), #if 0 then product copula Pi
546 pcopula = function(t, pars) prod(t)*(pars[1]*prod(1-t)+1),
547 dcopula = function(t, pars) 1 + pars[1]*(prod(2*t-1)),
548 kendall = list(coef=function(t) 2*t/9, icoef=function(t) 9*t/2, bounds=c(-2/9,2/9)),
549 spearman = list(coef=function(t) t/3, icoef=function(t) 3*t, bounds=c(-1/3,1/3)),
550 lower = -1, #
551 upper = 1, #
552 id="FGM"
553 )
554 output[names(list(...))] <- list(...)
555 output
556 }
557
558 #only 2D
559 copPlackett <- function(...) {
560 output <- list(
561 parameters = c(2), #if 1 then product copula Pi
562 pcopula = function(t, pars) {
563 if(abs(pars[1]-1) < 0.00001) prod(t)
564 else {
565 a <- 1+(pars[1]-1)*sum(t)
566 (a-sqrt(a^2-4*prod(t)*pars[1]*(pars[1]-1)))/(2*(pars[1]-1))
567 }
568 },
569 dcopula = function(t, pars) {
570 if(abs(pars[1]-1) < 0.00001) 1
571 else pars[1]*(1+(sum(t)-2*prod(t))*(pars[1]-1))/((1+(pars[1]-1)*sum(t))^2-4*prod(t)*pars[1]*(pars[1]-1))^(3/2)
572 },
573 rcopula = function(n, pars) {
574 u <- runif(n); v <- runif(n);
575 if(abs(pars[1]-1) < 0.00001) return(cbind(u,v))
576 a <- v*(1-v)
577 b <- sqrt(pars[1]*(pars[1]+4*a*u*(1-u)*(1-pars[1])^2))
578 cbind(u,(2*a*(u*pars[1]^2+1-u)+pars[1]*(1-2*a)-(1-2*v)*b)/(2*pars[1]+2*a*(pars[1]-1)^2))
579 },
580 kendall = list(coef=function(t) pPareto(t,c(3.24135,0.538913,1.21742,1)), icoef=function(t) qPareto(t,c(3.24135,0.538913,1.21742,1)), bounds=c(0,1)),
581 spearman = list(coef=function(t) (t^2-1-2*t*log(t))/(t-1)^2, icoef=NULL, bounds=c(-1,1)),
582 lower = 1e-05, #
583 upper = Inf, #
584 id="Plackett"
585 )
586 output[names(list(...))] <- list(...)
587 output
588 }
589
590 copNormal <- function(dim=2,...) {
591 parvec2matrix <- function(parvec) {
592 if(dim != (1+sqrt(8*length(parvec)+1))/2) stop("Dimension does not correspond to number of parameters")
593 m <- matrix(0,dim,dim)
594 m[lower.tri(m)] <- parvec #fill the lower triangle (without diagonal) with parameters
595 m+t(m)+diag(1,dim) #mirror the lower triangle and add 1's
596 }
597 if(require(mvtnorm)) {
598 pcopula <- function(t,pars) pmvnorm(lower=-Inf, upper=sapply(t,qnorm), corr=parvec2matrix(pars))[1]
599 rcopula <- function(n,pars) pnorm(rmvnorm(n=n,sigma=parvec2matrix(pars)))
600 }
601 else {
602 pcopula <- NULL
603 rcopula <- function(n,pars) t(pnorm(t(chol(parvec2matrix(pars)))%*%replicate(n,rnorm(dim))))
604 }
605 npar <- factorial(dim)/factorial(dim-2)/2 #variacia bez opakovania / 2
606 output <- list(
607 parameters = rep.int(0.5,npar), #if 0 then product copula Pi
608 pcopula = pcopula,
609 dcopula = function(t, pars) {
610 sig <- parvec2matrix(pars)
611 v <- sapply(t, qnorm)
612 exp(-0.5*t(v)%*%(solve(sig)-diag(1,length(t)))%*%v)/sqrt(det(sig))
613 },
614 rcopula = rcopula,
615 kendall = list(coef=function(t) 2*asin(t)/pi, icoef=function(t) sin(t*pi/2), bounds=c(-1,1)),
616 spearman = list(coef=function(t) 6*asin(t/2)/pi, icoef=function(t) 2*sin(t*pi/6), bounds=c(-1,1)),
617 lower = rep.int(-0.999,npar), #W (by limit)
618 upper = rep.int(0.999,npar), #M (by limit)
619 id="normal"
620 )
621 output[names(list(...))] <- list(...)
622 output
623 }
624
625 copula <- function(name,...) {
626 switch(name[1],
627 "FGM"=copFGM(...),
628 "Gumbel"=copGumbel(...),
629 "normal"=copNormal(...),
630 "Plackett"=copPlackett(...),
631 "product"=copProduct(...),
632 ({warning("Copula not recognized. Defaults to genLog().", immediate.=T);
633 copProduct(...)})
634 )
635 }
636
637 #cumulative distribution function (or probability dis.fun., that's why "p")
638 pCopula <- function(data, generator=genGumbel(), depfun=dep1(), copula=NULL,
639 gpars=generator$parameters, dpars=depfun$parameters, pars=if(is.null(copula)) list(gpars,dpars) else copula$parameters,
640 subdivisions=50,
641 quantile=NULL,probability=data[,quantile]) {
642 data <- rbind(data, deparse.level=0 ) #ensure data is matrix/data.frame
643 data[data>1] <- 1; data[data<0] <- 0 #crop data to [0,1]
644 names(data) <- NULL
645 dim <- ncol(data)
646 eps <- .Machine$double.eps^0.5
647 # --- cdf definition ---
648 #Archimax copula
649 if(is.null(copula)) {
650 if(!is.null(generator$pcopula) && depfun$id=="1") fun <- function(t) generator$pcopula(t,pars[[1]])
651 else if(!is.null(depfun$pcopula) && generator$id=="log") fun <- function(t) depfun$pcopula(t,pars[[2]])
652 else {
653 g <- function(t) generator$gen(t, pars[[1]]) #generator
654 gi <- function(t) generator$gen.inv(t, pars[[1]])
655 A <- function(t) depfun$dep(t, pars[[2]]) #Pickands depedence function
656 Aeq1 <- (abs(A(rep.int(1,dim-1)/dim)-1) < eps)
657 fun <- function(t) {
658 names(t) <- NULL
659 #t <- pmin(pmax(t,0),1)
660 #if( Aeq1 ) return(gi(sum(sapply(t,g)))) #reduce to archimedean
661 if( abs(prod(t)-0) < eps ) return(0) #0 is annihilator
662 if( abs(prod(t)-1) < eps ) return(1) #C(1,...,1)=1
663 gen <- sapply(t,g)
664 #if(any(!is.finite(gen))) browser()
665 arg <- gen/(sum(gen)+eps) #prevent an accidental overflow caused by rounding at the last decimal place
666 #if(!is.finite(A(arg[-dim]))) browser()
667 if(any(arg > 1-eps, Aeq1)) cdf <- gi( sum(gen) ) #reduce to archimedean
668 else cdf <- gi( sum(gen) * A(arg[-dim]) )
669 #if(any(t > 1)) browser()
670 if(!is.finite(cdf)) {
671 #print(c(t=t,gpar=pars[[1]],dpar=pars[[2]],generator=gen,A=A(gen[-dim]/sum(gen)),cdf=cdf))
672 return(0)
673 }
674 cdf
675 }
676 }
677 }
678 #arbitrary copula
679 else {
680 pars <- unlist(pars) #ensure pars is a vector instead of a list
681 if(!is.null(copula$pcopula)) {
682 fun <- function(t) {
683 if( abs(prod(t)-0) < eps ) return(0) #0 is annihilator
684 cdf <- copula$pcopula(t,pars)
685 cdf
686 }
687 }
688 else {
689 pdf <- function(t) copula$dcopula(t,pars)
690 if(!is.finite(pdf(lower <- rep.int(0,dim)))) lower <- 1e-4
691 fun <- function(t) nintegrate(pdf,lower=lower,upper=t,subdivisions=subdivisions)
692 }
693 }
694 # --- evaluation ---
695 # quantile (if asked for)
696 if(!is.null(quantile)) { #to improve: treat explicitely if only copula density is available
697 qua <- quantile[1]
698 if(quantile > dim) stop("quantile index > copula dimension")
699 data[,qua] <- rep(probability,length.out=nrow(data))
700 if(any(data[,qua] > apply(data[,-qua,drop=F],1,min))) stop("probability > data")
701 qcop <- function(v) { #v contains probability in place of the wanted quantile (qua-th element)
702 fun1 <- function(t) {p <- v[qua]; v[qua] <- t; fun(v) - p}
703 uniroot(fun1, interval=c(0,1))$root
704 }
705 apply(data,1,qcop)
706 }
707 #copula value (by default)
708 else apply(data,1,fun)
709 }
710
711 # copula density, the function checks for closed form availability
712 dCopula <- function(data,generator=genGumbel(),depfun=dep1(),copula=NULL,
713 gpars=generator$parameters, dpars=depfun$parameters,
714 pars=if(is.null(copula)) list(gpars,dpars) else copula$parameters,
715 difference=1e-4,area=c(0),shrinkdiff=FALSE) {
716 data <- rbind(data, deparse.level=0 ) #ensure data is matrix/data.frame
717 # --- Archimax copula ---
718 if(is.null(copula)) {
719 if(!is.null(generator$dcopula) && depfun$id=="1") fun <- function(t) generator$dcopula(t,pars[[1]])
720 else if(!is.null(depfun$dcopula) && generator$id=="log") fun <- function(t) depfun$dcopula(t,pars[[2]])
721 else if(ncol(data)==2) {
722 g <- function(t) generator$gen(t, pars[[1]])
723 gd <- function(t) generator$gen.der(t, pars[[1]])
724 gid <- function(t) generator$gen.inv.der(t, pars[[1]])
725 gidd <- function(t) generator$gen.inv.der2(t, pars[[1]])
726 A <- function(t) depfun$dep(t, pars[[2]])
727 Ad <- function(t) depfun$dep.der(t, pars[[2]])
728 Add <- function(t) depfun$dep.der2(t, pars[[2]])
729 if( abs(A(0.5)-1) < 1e-5 ) fun <- function(t) gd(t[1])*gidd(g(t[1])+g(t[2]))*gd(t[2])
730 else
731 fun <- function(t) {
732 g1 <- g(t[1]); g2 <- g(t[2]); arg <- g1/(g1+g2)
733 gd(t[1]) * (
734 gidd((g1+g2)*A(arg))*(A(arg)-arg*Ad(arg))*(A(arg)+(1-arg)*Ad(arg)) - arg*(1-arg)*gid((g1+g2)*A(arg))*Add(arg)/(g1+g2)
735 ) * gd(t[2])
736 }
737 }
738 else {
739 cdf <- function(t) pCopula(rbind(t),generator=generator,depfun=depfun,pars=pars,gpars=gpars,dpars=dpars)
740 if((k=min(1-max(data),min(data))) <= difference/(abs(area)+1) && shrinkdiff) {
741 difference=k*9/10 #treat leaking of numeric derivative over [0,1]
742 warning("dCopula: Difference in nderive decreased due to [0,1] overflow.",call.=FALSE)
743 }
744 fun <- function(t) nderive(cdf,point=t,order=rep.int(1,length(t)),difference=difference,area=area)
745 }
746 }
747 # --- arbitrary copula ---
748 else {
749 pars <- unlist(pars)
750 if(!is.null(copula$dcopula)) fun <- function(t) copula$dcopula(t,pars) #take explicit formula for density if it exists
751 else {
752 cdf <- function(t) copula$pcopula(t,pars)
753 if((min(1-max(data),min(data))) <= difference/(abs(area)+1)) {
754 cdf <- function(t) copula$pcopula(pmin.int(pmax.int(t,0),1),pars) #collapse surroundings to boundary (instead of shrinking difference) #test and use above if good
755 }
756 fun <- function(t) nderive(cdf,point=t,order=rep.int(1,length(t)),difference=difference,area=area)
757 }
758 }
759 # --- evaluation ---
760 apply(data,1,fun)
761 }
762
763 cCopula <- function(data, conditional.on=c(1), generator=genGumbel(), depfun=dep1(), copula=NULL,
764 gpars=generator$parameters, dpars=depfun$parameters, pars=if(is.null(copula)) list(gpars,dpars) else copula$parameters,
765 difference=1e-4,area=c(0),
766 quantile=NULL,probability=data[,quantile]) {
767 data <- rbind(data, deparse.level=0 ) #ensure data is matrix/data.frame
768 dim <- ncol(data)
769 if(max(conditional.on)>dim) stop("conditional.on > ncol(data)")
770 con <- conditional.on
771 eps <- .Machine$double.eps^0.5
772 # --- Archimax copula ---
773 if(is.null(copula)) {
774 if(dim==2) {
775 g <- function(t) generator$gen(t, pars[[1]])
776 gd <- function(t) generator$gen.der(t, pars[[1]])
777 gid <- function(t) generator$gen.inv.der(t, pars[[1]])
778 A <- function(t) depfun$dep(t, pars[[2]])
779 Ad <- function(t) depfun$dep.der(t, pars[[2]])
780 ccop <- function(v) { #t is unknown quantile, v is the rest of arguments (with probability)
781 if( min(abs(v)) < eps ) return(0) #0 is annihilator, prevent unboudedness of strict generator
782 g1 <- g(v[1]); g2 <- g(v[2]); arg <- g1/(g1+g2)
783 gid((g1+g2)*A(arg)) * gd((2-con)*v[1]+(con-1)*v[2]) * (A(arg)+Ad(arg)*(con-1-arg))
784 }
785 }
786 else {
787 cdf <- function(t) pCopula(t,generator=generator,depfun=depfun,pars=pars,gpars=gpars,dpars=dpars)
788 if((min(1-max(data),min(data))) <= difference/(abs(area)+1))
789 cdf <- function(t) pCopula(pmin.int(pmax.int(t,0),1),generator=generator,depfun=depfun,pars=pars,gpars=gpars,dpars=dpars) #collapse surroundings to boundary (instead of shrinking difference) #test and use above if good
790 ord <- rep.int(0,dim); ord[con] <- 1 #1 marks variable w.r.t. which the function will be differentiated
791 dcop <- function(v) nderive(cdf,point=v,order=ord,difference=difference,area=area)
792 ccop <- function(v) dcop(v)/local({v[setdiff(1:dim,con)] <- 1; dcop(v)})
793 }
794 }
795 # --- arbitrary copula ---
796 else { #to tidy up: if arbitrary copula will not contain dim=2 special case, join the 2<dimensional ccop-s with Archimax case
797 pars <- unlist(pars)
798 cdf <- function(t) copula$pcopula(t,pars)
799 if((min(1-max(data),min(data))) <= difference/(abs(area)+1)) {
800 cdf <- function(t) copula$pcopula(pmin.int(pmax.int(t,0),1),pars) #collapse surroundings to boundary (instead of shrinking difference) #test and use above if good
801 }
802 ord <- rep.int(0,dim); ord[con] <- 1 #1 marks variable w.r.t. which the function will be differentiated
803 dcop <- function(v) nderive(cdf,point=v,order=ord,difference=difference,area=area)
804 ccop <- function(v) dcop(v)/local({v[setdiff(1:dim,con)] <- 1; dcop(v)})
805 }
806 # --- evaluation ---
807 if(!is.null(quantile)) {
808 qua <- quantile
809 if(qua %in% con) stop("quantile must NOT be an element of conditional.on")
810 if(quantile > dim) stop("quantile index > copula dimension")
811 data[,qua] <- rep(probability,length.out=nrow(data))
812 qcop <- function(v) { #v contains probability in place of the wanted quantile (qua-th element)
813 fun <- function(t) {p <- v[qua]; v[qua] <- t; ccop(v) - p}
814 uniroot(fun, interval=c(0,1))$root
815 }
816 apply(data,1,qcop)
817 }
818 else apply(data,1,ccop)
819 }
820
821 #quantile of a copula (un/conditional) distribution function
822 qCopula <- function(data, quantile=1, probability=0.95, conditional.on=NULL,
823 generator=genGumbel(), depfun=dep1(), copula=NULL,
824 gpars=generator$parameters, dpars=depfun$parameters, pars=if(is.null(copula)) list(gpars,dpars) else copula$parameters,
825 difference=1e-4, area=c(0)) {
826 data <- rbind(data, deparse.level=0 ) #ensure data is matrix/data.frame
827 dim <- ncol(data) + 1
828 qua <- quantile
829 data <- cbind(data[,0:(qua-1)],probability,data[,if(qua==dim) 0 else qua:(dim-1)],deparse.level=0)
830 if(is.null(conditional.on)) pCopula(data=data,quantile=quantile,generator=generator,depfun=depfun,copula=copula,pars=pars,gpars=gpars,dpars=dpars)
831 else cCopula(data=data,conditional.on=conditional.on,quantile=quantile,generator=generator,depfun=depfun,copula=copula,pars=pars,gpars=gpars,dpars=dpars,difference=difference,area=area)
832 }
833
834 # - check the correctness of LS-nls, while LS-optim(nlminb) completely fails;
835 # - allow to enter initial values for optimisation routines!!
836 # - do not use 'fnscale' in 'control' of 'optim'
837 # - to improve: print summary
838
839 eCopula <- function(data, generator=genGumbel(), depfun=dep1(), copula=NULL,
840 glimits=list(generator$lower,generator$upper),
841 dlimits=list(depfun$lower,depfun$upper),
842 limits=list(copula$lower,copula$upper),
843 ggridparameters=if(!is.null(unlist(glimits))) do.call(function(...) mapply(c,...,length.out=pgrid,SIMPLIFY=FALSE), glimits) else NULL,
844 dgridparameters=if(!is.null(unlist(dlimits))) do.call(function(...) mapply(c,...,length.out=pgrid,SIMPLIFY=FALSE), dlimits) else NULL,
845 gridparameters=if(!is.null(unlist(limits))) do.call(function(...) mapply(c,...,length.out=pgrid,SIMPLIFY=FALSE), limits) else NULL,
846 technique=c("ML","LS","icorr"), procedure=c("optim","nlminb","nls","grid"), method="default", corrtype=c("kendall","spearman"),
847 control=NULL, pgrid=10) {
848 if(is.null(copula)) {
849 eCopulaArchimax(data=data,generator=generator,depfun=depfun,glimits=glimits,dlimits=dlimits,ggridparameters=ggridparameters,dgridparameters=dgridparameters,technique=technique,procedure=procedure,method=method,control=control,pgrid=pgrid,corrtype=corrtype)
850 }
851 else {
852 eCopulaGeneric(data=data,copula=copula,limits=limits,gridparameters=gridparameters,technique=technique,procedure=procedure,method=method,control=control,pgrid=pgrid,corrtype=corrtype)
853 }
854 }
855
856 ## fitting archimax
857 eCopulaArchimax <- function(data, generator, depfun=dep1(),
858 glimits=list(generator$lower,generator$upper),
859 dlimits=list(depfun$lower,depfun$upper),
860 ggridparameters=if(!is.null(unlist(glimits))) do.call(function(...) mapply(c,...,length.out=pgrid,SIMPLIFY=FALSE), glimits) else NULL,
861 dgridparameters=if(!is.null(unlist(dlimits))) do.call(function(...) mapply(c,...,length.out=pgrid,SIMPLIFY=FALSE), dlimits) else NULL,
862 technique=c("ML","LS","icorr"), procedure=c("optim","nlminb","nls","grid"), method="default", corrtype=c("kendall","spearman"),
863 control=NULL, pgrid=10) {
864 #initial (local) variables
865 npg <- length(generator$parameters); npd <- length(depfun$parameters) #number of gen and dep parameters
866 ig <- min(1,npg):npg; id <- {if(npd < 1) 0 else (1:npd)+npg} #indices of generator and depfun parameters
867 ind <- apply(data==1,1,prod) + apply(data==0,1,prod) # indices for removing unit and zero rows in data
868 if(technique[1]=="LS") empC <- pCopulaEmpirical(data)[!ind]
869 data <- data[!ind,]
870 splitad <- function(pars) list(gpars=pars[ig],dpars=pars[id]) #separate gen/dep parameters
871 #temporarily simple procedure inserted to provide basic functionality of "icorr" technique
872 if(technique[1]=="icorr") {
873 if(npg+npd>1) stop("Technique icorr can currently estimate only 1-parameter copula families.")
874 if(depfun$id=="1") copula <- generator
875 else copula <- depfun
876 if(is.null(corrlist <- copula[[corrtype[1]]])) stop("No relation of correlation coefficient to copula parameter defined.")
877 coef <- cor(data,method=corrtype)[1,2]
878 if((coef > corrlist$bounds[2]) || (coef < corrlist$bounds[1])) stop("Dependence measure out of bounds")
879 if(is.null(corrlist$icoef)) {
880 lim <- unlist(c(glimits,dlimits)); lim <- replace(lim,lim==Inf,1000); lim <- replace(lim,lim==-Inf,-1000)
881 parestim <- uniroot(function(t) corrlist$coef(t)-coef,interval=lim)$root
882 }
883 else parestim <- corrlist$icoef(coef)
884 output <- list(parameters=splitad(parestim),approach=c(technique[1]),fvalue=NULL,procedure.output=NULL)
885 class(output) <- "eCopulaArchimax"
886 return(output)
887 }
888 #switch to fitting technique
889 switch(technique[1],
890 ML={
891 fun <- function(pars) {
892 pars <- comboe(pars)
893 #truncate by "almost zero" to prevent negative values occured when numerical derivatives are used
894 copuladensity <- pmax(dCopula(data, generator=generator, depfun=depfun, pars=list(pars[ig],pars[id])), exp(-50))
895 sum(log(copuladensity))
896 }
897 fscale <- -1
898 },
899 LS={
900 fun <- function(pars) {
901 pars <- comboe(pars)
902 sum((pCopula(data, generator=generator, depfun=depfun, pars=list(pars[ig],pars[id])) - empC)^2)
903 }
904 fscale <- +1
905 funNLS <- function(u,pars) {
906 pars <- comboe(pars)
907 pCopula(u, generator=generator, depfun=depfun, pars=list(pars[ig],pars[id]))
908 }
909 }
910 )
911 #decision tree of grid estimation option
912 if(procedure[1]=="grid") {
913 #preparing parameters
914 makeseq <- function(x) { #make sequence if any argument for seq() is recognized
915 if(sum(match(names(x),c("from","to","by","length.out","along.with"),nomatch =0)) > 0) {
916 x <- replace(x,x==Inf,100); x <- replace(x,x==-Inf,-100)
917 return(unique(do.call(seq,as.list(x))))
918 }
919 else return(x)
920 }
921 ggridparameters <- lapply(ggridparameters,makeseq); dgridparameters <- lapply(dgridparameters,makeseq)
922 ggridparameters <- mapply(function(x,y,z) x[x>=y & x<=z], ggridparameters, generator$lower, generator$upper, SIMPLIFY=FALSE)
923 dgridparameters <- mapply(function(x,y,z) x[x>=y & x<=z], dgridparameters, depfun$lower, depfun$upper, SIMPLIFY=FALSE)
924 parsgrid <- as.matrix(expand.grid(c(ggridparameters,dgridparameters), KEEP.OUT.ATTRS = FALSE))
925 dimnames(parsgrid) <- NULL #prevent apply from returning infinite values
926 comboe <- identity #just for compatibility with (unconditional) optimization procedures
927 #evaluation
928 ind <- order(fvalue <- fscale*apply(parsgrid,1,fun))
929 res <- list(
930 parestim=unlist(parsgrid[ind[1],], use.names = FALSE),
931 fvalue=fscale*fvalue[ind[1]],
932 complete=list(
933 parsgrid=t(parsgrid),
934 fvalue=fscale*fvalue,
935 relfvalues=(max(fvalue)-fvalue)/(max(fvalue)-min(fvalue))
936 )
937 )
938 }
939 else {
940 #immediate exceptions
941 if(technique[1]=="ML" && procedure[1]=="nls" ) stop("ML and nls cannot be combined")
942 st <- c(generator$parameters, depfun$parameters) #starting values (to improve: should be optional)
943 lo <- c(glimits[[1]], dlimits[[1]]); up <- c(glimits[[2]], dlimits[[2]])
944 if( npg+npd != length(lo) || npg+npd != length(up)) stop("differing number of parameters")
945 ind <- st < lo; st[ind] <- lo[ind]
946 ipo <- (up - lo) <= 0; ipog <- ipo[ig]; ipod <- ipo[id] #T/F index of parameters omited in estimation
947 npe <- sum(!ipo) #number of parameters that will be estimated
948 ste <- st[!ipo]; loe <- lo[!ipo]; upe <- up[!ipo] #starting and bounding values of estimated parameters
949 comboe <- function(pars) { #combine omited and estimated parameters
950 parst <- numeric(npg+npd); parst[ipo] <- lo[ipo]; parst[!ipo] <- pars; parst
951 }
952 #switch to fitting procedure
953 switch(procedure[1],
954 optim={
955 res <- optim( par=st[!ipo], fn=fun, lower=lo[!ipo], upper=up[!ipo],
956 method=ifelse(method=="default","L-BFGS-B",method), control=c(list(fnscale=fscale,factr=1e12),control)
957 )
958 res <- list(parestim=res$par,fvalue=res$value,procedure.output=res)
959 },
960 nlminb={
961 fun1 <- function(pars) fscale*fun(pars) #check the "port" help to circumvent this line
962 res <- nlminb( start=st[!ipo], objective=fun1, lower=lo[!ipo], upper=up[!ipo],control=c(list(),control));
963 res <- list(parestim=res$par,fvalue=fscale*res$objective,procedure.output=res)
964 },
965 nls={
966 nlsmethod <- ifelse(method=="default","port",method)
967 if(ncol(data)!=3) stop("nls currently available only for 3D") #tip for improvement: handle formula separately in advance
968 emp <- as.data.frame(data); colnames(emp) <- c('u1','u2','u3'); emp <-cbind(empC,emp);
969 res <- switch(npe,
970 nls(C ~ funNLS(c(u1, u2,u3), c(par1)),
971 data = data,
972 start = list(par1 = ste[1]),
973 lower = list(par1 = loe[1]),
974 upper = list(par1 = upe[1]),
975 algorithm = nlsmethod
976 ),
977 nls(C ~ funNLS(c(u1, u2,u3), c(par1,par2)),
978 data = emp,
979 start = list(par1 = ste[1],par2 = ste[2]),
980 lower = list(par1 = loe[1],par2 = loe[2]),
981 upper = list(par1 = upe[1],par2 = upe[2]),
982 algorithm = nlsmethod
983 ),
984 nls(C ~ funNLS(c(u1, u2,u3), c(par1,par2,par3)),
985 data = emp,
986 start = list(par1 = ste[1],par2 = ste[2],par3 = ste[3]),
987 lower = list(par1 = loe[1],par2 = loe[2],par3 = loe[3]),
988 upper = list(par1 = upe[1],par2 = upe[2],par3 = upe[3]),
989 algorithm = nlsmethod
990 ),
991 nls(C ~ funNLS(c(u1, u2,u3), c(par1,par2,par3,par4)),
992 data = emp,
993 start = list(par1 = ste[1],par2 = ste[2],par3 = ste[3],par4 = ste[4]),
994 lower = list(par1 = loe[1],par2 = loe[2],par3 = loe[3],par4 = loe[4]),
995 upper = list(par1 = upe[1],par2 = upe[2],par3 = upe[3],par4 = upe[4]),
996 algorithm = nlsmethod
997 ),
998 nls(C ~ funNLS(c(u1, u2,u3), c(par1,par2,par3,par4,par5)),
999 data = emp,
1000 start = list(par1 = ste[1],par2 = ste[2],par3 = ste[3],par4 = ste[4],par5 = ste[5]),
1001 lower = list(par1 = loe[1],par2 = loe[2],par3 = loe[3],par4 = loe[4],par5 = loe[5]),
1002 upper = list(par1 = upe[1],par2 = upe[2],par3 = upe[3],par4 = upe[4],par5 = upe[5]),
1003 algorithm = nlsmethod
1004 )
1005 )
1006 res <- list(parestim=as.vector(coef(res)),fvalue=sum(residuals(res)^2),procedure.output=res)
1007 }
1008 )
1009 }
1010 parestim <- splitad(comboe(res$parestim))
1011 if("rescalepars" %in% names(depfun)) parestim$dpars <- depfun$rescalepars(parestim$dpars) # rescale to true parameters of depfun (if appliable)
1012 output <- list(parameters=parestim,approach=c(technique[1],procedure[1],method[1]),fvalue=res$fvalue,procedure.output=res)
1013 class(output) <- "eCopulaArchimax"
1014 output
1015 }
1016
1017 ## fitting generic copula
1018 eCopulaGeneric <- function(data, copula=copGumbel(),
1019 limits=list(copula$lower,copula$upper),
1020 gridparameters=if(!is.null(unlist(limits))) do.call(function(...) mapply(c,...,length.out=pgrid,SIMPLIFY=FALSE), limits) else NULL,
1021 technique=c("ML","LS","icorr"), procedure=c("optim","nlminb","grid"), method="default", corrtype=c("kendall","spearman"),
1022 control=NULL, pgrid=10) {
1023 #initial (local) variables
1024 np <- length(copula$parameters) #number copula parameters
1025 ind <- apply(data==1,1,prod) + apply(data==0,1,prod) # indices for removing unit and zero rows in data
1026 if(technique[1]=="LS") empC <- pCopulaEmpirical(data)[!ind]
1027 data <- data[!ind,]
1028 #temporarily simple procedure inserted to provide basic functionality of "icorr" technique
1029 if(technique[1]=="icorr") {
1030 if(np>1) stop("Technique icorr can currently estimate only 1-parameter copula families.")
1031 if(is.null(corrlist <- copula[[corrtype[1]]])) stop("No relation of correlation coefficient to copula parameter defined.")
1032 coef <- cor(data,method=corrtype)[1,2]
1033 if((coef > corrlist$bounds[2]) || (coef < corrlist$bounds[1])) stop("Dependence measure out of bounds")
1034 if(is.null(corrlist$icoef)) {
1035 lim <- unlist(limits); lim <- replace(lim,lim==Inf,1000); lim <- replace(lim,lim==-Inf,-1000)
1036 parestim <- uniroot(function(t) corrlist$coef(t)-coef,interval=lim)$root
1037 }
1038 else parestim <- corrlist$icoef(coef)
1039 output <- list(parameters=parestim,approach=c(technique[1]),fvalue=NULL,procedure.output=NULL)
1040 class(output) <- "eCopulaGeneric"
1041 return(output)
1042 }
1043 #switch to fitting technique
1044 switch(technique[1],
1045 ML={
1046 fun <- function(pars) {
1047 pars <- comboe(pars)
1048 #truncate by "almost zero" to prevent negative values occured when numerical derivatives are used
1049 copuladensity <- pmax(dCopula(data, copula=copula, pars=pars), exp(-50))
1050 sum(log(copuladensity))
1051 }
1052 fscale <- -1
1053
1054 },
1055 LS={
1056 fun <- function(pars) {
1057 pars <- comboe(pars)
1058 sum((pCopula(data, copula=copula, pars=pars) - empC)^2)
1059 }
1060 fscale <- +1
1061 }
1062 )
1063 #decision tree of grid estimation option
1064 if(procedure[1]=="grid") {
1065 #preparing parameters
1066 makeseq <- function(x) { #make sequence if any argument for seq() is recognized
1067 if(sum(match(names(x),c("from","to","by","length.out","along.with"),nomatch =0)) > 0) {
1068 x <- replace(x,x==Inf,100); x <- replace(x,x==-Inf,-100)
1069 return(unique(do.call(seq,as.list(x))))
1070 }
1071 else return(x)
1072 }
1073 gridparameters <- lapply(gridparameters,makeseq)
1074 gridparameters <- mapply(function(x,y,z) x[x>=y & x<=z], gridparameters, generator$lower, generator$upper, SIMPLIFY=FALSE)
1075 parsgrid <- as.matrix(expand.grid(gridparameters, KEEP.OUT.ATTRS = FALSE))
1076 dimnames(parsgrid) <- NULL #prevent apply from returning infinite values
1077 comboe <- identity #just for compatibility with (unconditional) optimization procedures
1078 #evaluation
1079 ind <- order(fvalue <- fscale*apply(parsgrid,1,fun))
1080 res <- list(
1081 parestim=unlist(parsgrid[ind[1],], use.names = FALSE),
1082 fvalue=fscale*fvalue[ind[1]],
1083 complete=list(
1084 parsgrid=t(parsgrid),
1085 fvalue=fscale*fvalue,
1086 relfvalues=(max(fvalue)-fvalue)/(max(fvalue)-min(fvalue))
1087 )
1088 )
1089 }
1090 else {
1091 #immediate exceptions
1092 st <- copula$parameters #starting values (to improve: should be optional)
1093 lo <- limits[[1]]; up <- limits[[2]]
1094 if( np != length(lo) || np != length(up)) stop("Differing number of parameters.")
1095 ind <- st < lo; st[ind] <- lo[ind]
1096 ipo <- (up - lo) <= 0 #T/F index of parameters omited in estimation
1097 npe <- sum(!ipo) #number of parameters that will be estimated
1098 ste <- st[!ipo]; loe <- lo[!ipo]; upe <- up[!ipo] #starting and bounding values of estimated parameters
1099 comboe <- function(pars) { #combine omited and estimated parameters
1100 parst <- numeric(np); parst[ipo] <- lo[ipo]; parst[!ipo] <- pars; parst
1101 }
1102 #switch to fitting procedure
1103 switch(procedure[1],
1104 optim={
1105 method <- ifelse(method=="default","L-BFGS-B",method)
1106 if(method %in% c("Nelder-Mead", "BFGS", "CG", "SANN", "Brent")) { #for these optim methods limit the parameters range implicitely
1107 fun <- function(pars) {
1108 parscut <- pmin.int(pmax.int(pars,loe),upe)
1109 if(any(parscut != pars)) hndcp <- (1+sum(abs(pars-parscut)))^fscale else hndcp <- 1 #handicap out-of-bounds parameters
1110 fun(parscut)*hndcp
1111 }
1112 res <- optim( par=ste, fn=fun, method=method, control=c(list(fnscale=fscale,factr=1e12),control) )
1113 }
1114 else { #if "L-BFGS-B" method is used, fun arguments will not surpass the limits during optimization
1115 res <- optim( par=ste, fn=fun, lower=loe, upper=upe, method=method, control=c(list(fnscale=fscale,factr=1e12),control) )
1116 }
1117 res <- list(parestim=res$par,fvalue=res$value,procedure.output=res) #compose output
1118 },
1119 nlminb={
1120 fun1 <- function(pars) fscale*fun(pars) #consult "port" help to circumvent this line
1121 res <- nlminb( start=ste, objective=fun1, lower=loe, upper=upe,control=c(list(),control));
1122 res <- list(parestim=res$par,fvalue=fscale*res$objective,procedure.output=res)
1123 }
1124 )
1125 }
1126 parestim <- comboe(res$parestim)
1127 if("rescalepars" %in% names(copula)) parestim <- copula$rescalepars(parestim) # rescale to true parameters of copula (if appliable)
1128 output <- list(parameters=parestim,approach=c(technique[1],procedure[1],method[1]),fvalue=res$fvalue,procedure.output=res)
1129 class(output) <- "eCopulaGeneric"
1130 output
1131 }
1132
1133
1134 #methods for printing eCopula results list
1135 print.eCopulaArchimax <- function(x,...) {
1136 cat("generator parameters: ", x$parameters$gpars, "\n")
1137 cat(" depfun parameters: ", x$parameters$dpars, "\n")
1138 cat(" ", x$approach[1], "function value: ", x$fvalue, "\n")
1139 cat(" convergence code: ", x$procedure.output$procedure.output$convergence, "\n")
1140 }
1141 print.eCopulaGeneric <- function(x,...) {
1142 cat(" copula parameters: ", x$parameters, "\n")
1143 cat(" ", x$approach[1], "function value: ", x$fvalue, "\n")
1144 cat(" convergence code: ", x$procedure.output$procedure.output$convergence, "\n")
1145 }
1146
1147 #simulation of dim-dimensional random vector from archimax copula
1148 #note the thinned runif range due to nderive 'difference' settings
1149 rCopula <- function(n,dim=2,generator=genGumbel(),depfun=dep1(),copula=NULL,
1150 gpars=generator$parameters, dpars=depfun$parameters, pars=if(is.null(copula)) list(gpars,dpars) else copula$parameters) {
1151 if(dim==2 && is.null(copula)) return( rCopulaArchimax2D(n,generator=generator,depfun=depfun,pars=pars) )
1152 if(!is.null(copula$rcopula)) return( copula$rcopula(n,pars) )
1153 cdf <- function(u) pCopula(u,generator=generator,depfun=depfun,copula=copula,pars=pars)
1154 mcop <- function(u) cdf(rbind(c(u,rep.int(1,dim-length(u)))))
1155 NDdiff <- 1e-4
1156 ccop <- function(v,u) nderive(fun=mcop,point=c(u,v),order=c(rep.int(1,length(u)),0),difference=NDdiff)/nderive(fun=mcop,point=u,order=rep.int(1,length(u)),difference=NDdiff)
1157 qcop <- function(p,u) uniroot(function(t) ccop(t,u) - p, interval=c(0,1))$root
1158 p <- matrix(runif(dim*n,min=0+1.1*NDdiff,max=1-1.1*NDdiff),ncol=dim) #thin the runif range using NDdiff to avoid overflow in nderive
1159 result <- NULL
1160 for(i in 1:n) {
1161 x <- p[i,1]
1162 for(j in 2:dim) x <- c(x, qcop(p[i,j],x))
1163 result <- rbind(result,x,deparse.level=0)
1164 }
1165 result
1166 }
1167
1168 # simulation of 2D copula
1169 rCopulaArchimax2D <- function(n, generator=genLog(), depfun=dep1(),
1170 gpars=generator$parameters, dpars=depfun$parameters, pars=list(gpars,dpars)) {
1171 g <- function(t) generator$gen(t, pars[[1]])
1172 gd <- function(t) generator$gen.der(t, pars[[1]])
1173 gi <- function(t) generator$gen.inv(t, pars[[1]])
1174 A <- function(t) depfun$dep(t, pars[[2]])
1175 Ad <- function(t) depfun$dep.der(t, pars[[2]])
1176 Add <- function(t) depfun$dep.der2(t, pars[[2]])
1177 K <- function(t) t - ifelse(t>0 && t<1, g(t)/gd(t), 0)
1178 Ki <- function(t) uniroot(f = function(x) K(x) - t, interval=c(0,1))$root
1179 H <- function(t) t + ifelse(t>0 && t<1, t*(1-t)*Ad(t)/A(t), 0)
1180 Hi <- function(t) uniroot(f = function(x) H(x) - t, interval=c(0,1))$root
1181 rCA <- function(n) { #simulation from Archimedean copula
1182 s <- runif(n); w <- runif(n)
1183 w <- sapply(w,Ki); gw <- sapply(w,g)
1184 cbind(u=sapply(s*gw,gi),v=sapply((1-s)*gw,gi))
1185 }
1186 Aeq1 <- isTRUE(all.equal(A(0.5),1))
1187 if( Aeq1 ) return( rCA(n) )
1188 p <- function(t) {
1189 A <- A(t); Ad <- Ad(t); Add <- Add(t); D <- Ad/A; Dd <- (Add*A - Ad^2)/A^2
1190 h <- 1 + (1-2*t)*D + t*(1-t)*Dd
1191 t*(1-t)*Add/(h*A)
1192 }
1193 z <- sapply(runif(n),Hi); u <- runif(n)
1194 ind <- (u <= sapply(z,p))
1195 w <- numeric(n); w[ind] <- runif(sum(ind)); w[!ind] <- sapply(runif(n-sum(ind)),Ki)
1196 gA <- sapply(w,g)/sapply(z,A)
1197 cbind(u=sapply(z*gA,gi), v=sapply((1-z)*gA,gi))
1198 }
1199
1200
1201 #Blanket goodness-of-fit test
1202 #require library(parallel) iff ncores > 1 (functionality not appliable on Windows OS)
1203 #Example: gCopula(u123[,1:2],generator=genGumbel,dep=dep1,N=10,nc=1,m=400)
1204 gCopula <- function(data, generator, depfun=dep1(), copula=NULL,
1205 glimits=list(generator$lower,generator$upper),
1206 dlimits=list(depfun$lower,depfun$upper),
1207 limits=list(copula$lower,copula$upper),
1208 etechnique=c("ML","LS","icorr"), eprocedure=c("optim","nlminb","nls"), emethod="default", ecorrtype=c("kendall","spearman"), econtrol=NULL,
1209 N=100, m=nrow(data), ncores=1) {
1210 if(is.list(data)) return(gCopulaEmpirical(data=data,N=N,ncores=ncores))
1211 n <- nrow(data)
1212 dim <- ncol(data)
1213 Ein <- pCopulaEmpirical(data)
1214 fKn <- function(x,y) sum(y <= x)/length(y)
1215 fparest <- function(data) eCopula(data,generator=generator,depfun=depfun,copula=copula,glimits=glimits,dlimits=dlimits,limits=limits,
1216 technique=etechnique,procedure=eprocedure,method=emethod,corrtype=ecorrtype,control=econtrol)
1217 fsimcop <- function(parameters) rCopula(m,dim=dim,generator=generator,depfun=depfun,copula=copula, pars=parameters)
1218 estim <- fparest(data)
1219 #---2D Archimax---
1220 if(ncol(data)==2 && is.null(copula)) {
1221 m <- n
1222 fK <- function(vec,pars) {
1223 tauA <- 0
1224 if(depfun$id!="1") {
1225 integrand <- Vectorize(function(t) t*(1-t)*depfun$dep.der2(t, pars[[2]])/depfun$dep(t, pars[[2]]), vectorize.args="t")
1226 tauA <- integrate(integrand,lower=0,upper=1)$value
1227 }
1228 sapply(vec, function(t) t - ifelse(t>0 && t<1, (1-tauA)*generator$gen(t, pars[[1]])/generator$gen.der(t, pars[[1]]), 0))
1229 }
1230 Sn <- sum((rank(Ein)/n - fK(Ein,estim$parameters))^2)
1231 pb <- txtProgressBar(min=0,max=N,style=3)
1232 loop <- function(k) {
1233 simcop <- fsimcop(estim$parameters)
1234 Ein <- pCopulaEmpirical(simcop)
1235 simcopRescaled <- apply(simcop,2,rank)/(n+1)
1236 parest <- fparest(simcopRescaled)$parameters
1237 setTxtProgressBar(pb, k)
1238 #if(k%%10==0) print(k)
1239 sum((rank(Ein)/n - fK(Ein,parest))^2)
1240 }
1241 }
1242 #---other copulas---
1243 else {
1244 m <- max(m,n)
1245 simcop <- fsimcop(estim$parameters)
1246 Vi <- pCopulaEmpirical(simcop)
1247 Bm <- rank(Vi)/m
1248 Kn <- sapply(Vi,fKn,y=Ein)
1249 Sn <- (n/m)*sum((Kn-Bm)^2)
1250 pb <- txtProgressBar(min=0,max=N,style=3)
1251 loop <- function(k) {
1252 simcop <- fsimcop(estim$parameters)
1253 Ein <- pCopulaEmpirical(simcop)
1254 simcopRescaled <- apply(simcop,2,rank)/(m+1)
1255 parest <- fparest(simcopRescaled)$parameters
1256 simcop <- fsimcop(parest)
1257 Vi <- pCopulaEmpirical(simcop)
1258 Bm <- rank(Vi)/m
1259 Kn <- sapply(Vi,fKn,y=Ein)
1260 setTxtProgressBar(pb, k)
1261 #if(k%%10==0) print(k)
1262 (n/m)*sum((Kn-Bm)^2)
1263 }
1264 }
1265 Snk <- if(ncores > 1) do.call(c, parallel::mclapply(1:N,loop,mc.cores=ncores)) else sapply(1:N,loop)
1266 close(pb)
1267 #cat("\n")
1268 output <- list(
1269 statistic=Sn,
1270 q95=quantile(Snk,0.95,names=F),
1271 p.value=sum(Snk > Sn)/N,
1272 estimate=c(if(is.null(copula)) c(gpars=estim$parameters$gpars,dpars=estim$parameters$dpars) else pars=estim$parameters, fvalue=estim$fvalue),
1273 data.name=deparse(substitute(data)),
1274 method="Blanket GOF test based on Kendall's transform",
1275 fitting_method=as.vector(c(etechnique,eprocedure,emethod)),
1276 copula_id=if(is.null(copula)) c(generator=generator$id,depfun=depfun$id) else copula$id
1277 )
1278 class(output) <- "gCopula"
1279 output
1280 }
1281
1282 gCopulaEmpirical <- function(data,N=100,ncores=1) {
1283 data1 <- data[[1]]; data2 <- data[[2]]
1284 dim <- ncol(data1); if(dim!=ncol(data2)) stop("Different number of columns.")
1285 n1 <- nrow(data1); n2 <- nrow(data2)
1286 #Cramer-von Mises test statistic
1287 Sn <- local({
1288 split1 <- split(data1,col(data1)); split2 <- split(data2,col(data2))
1289 s1 <- sum(Reduce("*",lapply(split1,function(a) outer(a,a,function(...) 1-pmax(...)))))
1290 s2 <- sum(Reduce("*",mapply(function(a,b) outer(a,b,function(...) 1-pmax(...)),split1,split2,SIMPLIFY=F)))
1291 s3 <- sum(Reduce("*",lapply(split2,function(a) outer(a,a,function(...) 1-pmax(...)))))
1292 1/(1/n1+1/n2)*(s1/n1^2-s2*2/n1/n2+s3/n2^2)
1293 })
1294 #empirical copulas and their approximate derivatives
1295 Cn <- function(x) sum(apply(t(data1) <= x,2,prod))/n1
1296 Dn <- function(x) sum(apply(t(data2) <= x,2,prod))/n2
1297 dCn <- function(x,i,h) {
1298 e <- numeric(dim); e[i] <- h
1299 (Cn(x+e)-Cn(x-e))/2/h
1300 }
1301 dDn <- function(x,i,h) {
1302 e <- numeric(dim); e[i] <- h
1303 (Dn(x+e)-Dn(x-e))/2/h
1304 }
1305 #gaussian process and multiplier technique functions
1306 alpha <- function(x) sum(apply(t(data1) <= x,2,prod)*xi)/sqrt(n1)
1307 gamma <- function(x) sum(apply(t(data2) <= x,2,prod)*zeta)/sqrt(n2)
1308 beta <- function(x,i) sum((data1[,i]<=x)*xi)/sqrt(n1)
1309 delta <- function(x,i) sum((data2[,i]<=x)*zeta)/sqrt(n2)
1310 Ck <- function(x) alpha(x) - sum(sapply(1:dim,function(i) beta(x[i])*dCn(x,i,1/sqrt(n1))))
1311 Dk <- function(x) gamma(x) - sum(sapply(1:dim,function(i) delta(x[i])*dDn(x,i,1/sqrt(n2))))
1312 EPSk <- function(x) (sqrt(n2)*Ck(x)-sqrt(n1)*Dk(x)) #divisor sqrt(n1+n2) moved to integral
1313 #merged data
1314 data12 <- merge(data1,data2,all=T); n12 <- nrow(data12)
1315 #iteration
1316 pb <- txtProgressBar(min=0,max=N,style=3)
1317 xi <- zeta <- numeric(0)
1318 loop <- function(k) {
1319 xii <- rnorm(n1); xi <<- xii - mean(xii)
1320 zetaa <- rnorm(n2); zeta <<- zetaa - mean(zetaa)
1321 setTxtProgressBar(pb, k)
1322 sum(apply(data12,1,EPSk)^2)/(n1+n2)/n12
1323 }
1324 Snk <- if(ncores > 1) do.call(c, parallel::mclapply(1:N,loop,mc.cores=ncores)) else sapply(1:N,loop)
1325 close(pb)
1326 #compose output
1327 output <- list(
1328 statistic=Sn,
1329 q95=quantile(Snk,0.95,names=F),
1330 p.value=sum(Snk > Sn)/N,
1331 estimate=NULL,
1332 data.name=if(is.null(names(data))) deparse(substitute(data)) else names(data),
1333 method="Test of equality between 2 empirical copulas (Remillard & Scaillet 2009)",
1334 fitting_method=NULL,
1335 copula_id=NULL
1336 )
1337 class(output) <- "gCopula"
1338 output
1339 }
1340
1341
1342 #method for printing gCopula results list
1343 print.gCopula <- function(x,...) {
1344 cat("\n\t\t", x$method, "\n\n")
1345 print.default(c(statistic=x$statistic,q95=x$q95,p.value=x$p.value))
1346 cat("-----------------------------\n")
1347 cat("data: ", x$data.name, "\n")
1348 cat("copula: ",x$copula_id,"\n")
1349 cat("estimates:\n")
1350 print.default(x$estimate)
1351 invisible(x)
1352 }
1353
1354 #check d-increasingness, 0 as annihilator and 1 as neutral element for every parameters combination
1355 #Example: isCopula(generator=genJoe,dep=depGalambos,dim=3)
1356 isCopula <- function(generator=genLog(),depfun=dep1(),copula=NULL,
1357 glimits=list(generator$lower,generator$upper),
1358 dlimits=list(depfun$lower,depfun$upper),
1359 limits=list(copula$lower,copula$upper),
1360 ggridparameters=if(!is.null(unlist(glimits))) do.call(function(...) mapply(c,...,length.out=pgrid,SIMPLIFY=FALSE), glimits) else NULL,
1361 dgridparameters=if(!is.null(unlist(dlimits))) do.call(function(...) mapply(c,...,length.out=pgrid,SIMPLIFY=FALSE), dlimits) else NULL,
1362 gridparameters=if(!is.null(unlist(limits))) do.call(function(...) mapply(c,...,length.out=pgrid,SIMPLIFY=FALSE), limits) else NULL,
1363 dagrid=10, pgrid=10, dim=3, tolerance=1e-15) {
1364 #initial (local) variables
1365 npg <- length(generator$parameters); npd <- length(depfun$parameters) #number of gen and dep parameters
1366 ig <- min(1,npg):npg; id <- {if(npd < 1) 0 else (1:npd)+npg} #indices of generator and depfun parameters
1367 splitad <- function(pars) list(gpars=pars[ig],dpars=pars[id]) #func to separate gen/dep parameters (in the end)
1368 #preparing parameters
1369 makeseq <- function(x) { #make sequence if any argument for seq() is recognized
1370 if(sum(match(names(x),c("from","to","by","length.out","along.with"),nomatch =0)) > 0) {
1371 x <- replace(x,x==Inf,32); x <- replace(x,x==-Inf,-32) #replace infinite boundary
1372 return(unique(do.call(seq,as.list(x))))
1373 }
1374 else return(x)
1375 }
1376 gparameters <- lapply(ggridparameters,makeseq) #make sequence
1377 dparameters <- lapply(dgridparameters,makeseq)
1378 parameters <- lapply(gridparameters,makeseq)
1379 gparameters <- mapply(function(x,y,z) x[x>=y & x<=z], gparameters, generator$lower, generator$upper, SIMPLIFY=FALSE) #ensure the parameter sequence is within permitted range
1380 dparameters <- mapply(function(x,y,z) x[x>=y & x<=z], dparameters, depfun$lower, depfun$upper, SIMPLIFY=FALSE)
1381 parameters <- mapply(function(x,y,z) x[x>=y & x<=z], parameters, copula$lower, copula$upper, SIMPLIFY=FALSE)
1382 #preparing data
1383 axes <- mapply(seq.int,rep.int(0,dim),rep.int(1,dim),MoreArgs=list(length.out=dagrid)) #expand grid of all (n=m^d) points
1384 allpoints <- do.call(expand.grid,split(t(axes),1:dim))
1385 #distinguish archimax and generic copula
1386 if(is.null(copula)) {
1387 parameters <- c(gpar=gparameters,dparameters=dparameters)
1388 cdfun <- function(pars) pCopula(allpoints, generator=generator, depfun=depfun, pars=list(pars[ig],pars[id]))
1389 }
1390 else {
1391 names(parameters) <- paste("cpar",1:length(parameters),sep="")
1392 cdfun <- function(pars) pCopula(allpoints, copula=copula, pars=pars)
1393 }
1394 #minimal dim-difference (C-volume) in hypercube data grid
1395 monot <- function(hcdata) {
1396 out <- numeric(dim)
1397 for(i in 1:dim) {
1398 sub1 <- subd <- rep.int(",",dim) #dim equals length(dim(hcdata))
1399 sub1[i] <- -1; subd[i] <- -dim(hcdata)[i]
1400 subcop <- function(sub) eval(parse(text = paste(c("hcdata[", sub, ", drop = FALSE]"),collapse="")))
1401 hcdata <- subcop(sub1) - subcop(subd) #recursive first differences
1402 out[i] <- min(hcdata) #find the smallest of the current i-th differences
1403 }
1404 out
1405 }
1406 annih <- function(hcdata) {
1407 sapply(
1408 1:dim,
1409 function(i) { #e.g. i=3,dim=4 yields hcdata[,,1,]
1410 sub <- rep.int(",",dim) #dim equals length(dim(hcdata))
1411 sub[i] <- 1
1412 max(abs(eval(parse(text = paste(c("hcdata[", sub, "]"),collapse=""))))) #find maximal deviation from 0 #max|C(x1,...xn,0,xm,...) - 0|
1413 }
1414 )
1415 }
1416 neutel <- function(hcdata) {
1417 sapply(
1418 1:dim,
1419 function(i) {
1420 sub <- as.character(dim(hcdata))
1421 sub[i] <- ""
1422 max(abs(eval(parse(text = paste(c("hcdata[", paste(sub,collapse=","), "]"),collapse="")))-axes[,i])) #max|C(1,...1,x,1,...1) - x|
1423 }
1424 )
1425 }
1426 fun <- function(pars) {
1427 coparray <- cdfun(pars)
1428 if(length(coparray)!=dagrid^dim || !all(is.finite(coparray))) stop(paste("Copula with parameter(s): ",paste(pars,collapse=" ")," led to errors.\n")) #test for propper length (nrows) to catch errors in cdf
1429 coparray <- array(coparray,rep.int(dagrid,dim))
1430 c(
1431 monot(coparray),
1432 -annih(coparray), #minus unary operator for evaluation compatibility with one-sided monot criterion
1433 -neutel(coparray)
1434 )
1435 }
1436 parsgrid <- as.matrix(expand.grid(parameters, KEEP.OUT.ATTRS = FALSE)) #all combinations
1437 result <- apply(parsgrid,1,fun); dim(result) <- c(dim,3,ncol(result)) #dim1~copuladimension,dim2~(monot,annih,neutel),dim3~parametersgrid
1438 ind <- which(result < 0 - tolerance, arr.ind=T, useNames=F)
1439 output <- data.frame(
1440 dim=ind[,1], #to which variable the issue is related
1441 property=c("monot","annih","neutel")[ind[,2]], #which copula property is violated
1442 value=result[ind]*ifelse(ind[,2]==1,1,-1), #value of difference or deviation from theoretical value, that exceeds tolerance
1443 parsgrid[ind[,3],,drop=F] #actual parameters of copula related to the issue
1444 )
1445 output <- list(
1446 is.copula = !as.logical(nrow(output)),
1447 issues = output
1448 )
1449 class(output) <- "isCopula"
1450 output
1451 }
1452
1453 #method for printing isCopula results (does not work with output as data.frame; to fix)
1454 print.isCopula <- function(x,...) {
1455 n <- nrow(x$issues)
1456 cat("\n Does the object appears to be a copula(?): ", x$is.copula, "\n")
1457 if(n>0) {
1458 cat("\n Showing", min(n,5), "of", n, "issues: \n\n")
1459 print.data.frame(x$issues[1:min(n,5),])
1460 }
1461 #invisible(x)
1462 }
1463
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.