Title: | Bayesian Inference for Multivariate Stochastic Differential Equations |
---|---|
Description: | Implements an MCMC sampler for the posterior distribution of arbitrary time-homogeneous multivariate stochastic differential equation (SDE) models with possibly latent components. The package provides a simple entry point to integrate user-defined models directly with the sampler's C++ code, and parallelizes large portions of the calculations when compiled with 'OpenMP'. |
Authors: | Martin Lysy [aut, cre], Feiyu Zhu [aut], JunYong Tong [aut], Trevor Kitt [ctb], Nigel Delaney [ctb] |
Maintainer: | Martin Lysy <[email protected]> |
License: | GPL-3 |
Version: | 1.0.5 |
Built: | 2024-11-04 03:33:41 UTC |
Source: | https://github.com/mlysy/msde |
Implements an MCMC sampler for the posterior distribution of arbitrary time-homogeneous multivariate stochastic differential equation (SDE) models with possibly latent components. The package provides a simple entry point to integrate user-defined models directly with the sampler's C++ code, and parallelizes large portions of the calculations when compiled with 'OpenMP'.
See package vignettes; vignette("msde-quicktut")
for a tutorial and vignette("msde-exmodels")
for several example models.
Maintainer: Martin Lysy [email protected]
Authors:
Feiyu Zhu
JunYong Tong
Other contributors:
Trevor Kitt [contributor]
Nigel Delaney [contributor]
Useful links:
# Posterior inference for Heston's model # compile model hfile <- sde.examples("hest", file.only = TRUE) param.names <- c("alpha", "gamma", "beta", "sigma", "rho") data.names <- c("X", "Z") hmod <- sde.make.model(ModelFile = hfile, param.names = param.names, data.names = data.names) # or simply load pre-compiled version hmod <- sde.examples("hest") # Simulate data X0 <- c(X = log(1000), Z = 0.1) theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) dT <- 1/252 nobs <- 1000 hest.sim <- sde.sim(model = hmod, x0 = X0, theta = theta, dt = dT, dt.sim = dT/10, nobs = nobs) # initialize MCMC sampler # both components observed, no missing data between observations init <- sde.init(model = hmod, x = hest.sim$data, dt = hest.sim$dt, theta = theta) # Initialize posterior sampling argument nsamples <- 1e4 burn <- 1e3 hyper <- NULL # flat prior hest.post <- sde.post(model = hmod, init = init, hyper = hyper, nsamples = nsamples, burn = burn) # plot the histogram for the sampled parameters par(mfrow = c(2,3)) for(ii in 1:length(hmod$param.names)) { hist(hest.post$params[,ii],breaks=100, freq = FALSE, main = parse(text = hmod$param.names[ii]), xlab = "") }
# Posterior inference for Heston's model # compile model hfile <- sde.examples("hest", file.only = TRUE) param.names <- c("alpha", "gamma", "beta", "sigma", "rho") data.names <- c("X", "Z") hmod <- sde.make.model(ModelFile = hfile, param.names = param.names, data.names = data.names) # or simply load pre-compiled version hmod <- sde.examples("hest") # Simulate data X0 <- c(X = log(1000), Z = 0.1) theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) dT <- 1/252 nobs <- 1000 hest.sim <- sde.sim(model = hmod, x0 = X0, theta = theta, dt = dT, dt.sim = dT/10, nobs = nobs) # initialize MCMC sampler # both components observed, no missing data between observations init <- sde.init(model = hmod, x = hest.sim$data, dt = hest.sim$dt, theta = theta) # Initialize posterior sampling argument nsamples <- 1e4 burn <- 1e3 hyper <- NULL # flat prior hest.post <- sde.post(model = hmod, init = init, hyper = hyper, nsamples = nsamples, burn = burn) # plot the histogram for the sampled parameters par(mfrow = c(2,3)) for(ii in 1:length(hmod$param.names)) { hist(hest.post$params[,ii],breaks=100, freq = FALSE, main = parse(text = hmod$param.names[ii]), xlab = "") }
Computes the exact Euler loglikelihood for any amount of missing data using a Kalman filter.
mou.loglik(X, dt, nvar.obs, Gamma, Lambda, Phi, mu0, Sigma0)
mou.loglik(X, dt, nvar.obs, Gamma, Lambda, Phi, mu0, Sigma0)
X |
An |
dt |
A scalar or length |
nvar.obs |
A scalar or length |
Gamma |
A |
Lambda |
A length- |
Phi |
A |
mu0 , Sigma0
|
Mean and variance of marginal multivariate normal distribution of |
The -dimensional multivariate Ornstein-Uhlenbeck (mOU) process
satisfies the SDE
where is
-dimensional Brownian motion. Its Euler discretization is of the form
where ,
and
Thus, is multivariate normal Markov chain for which the marginal distribution of any subset of timepoints and/or components can be efficiently calculated using the Kalman filter. This can be used to check the MCMC output of
sde.post()
as in the example.
Scalar value of the loglikelihood. See Details.
# bivariate OU model bmod <- sde.examples("biou") # simulate some data # true parameter values Gamma0 <- .1 * crossprod(matrix(rnorm(4),2,2)) Lambda0 <- rnorm(2) Phi0 <- crossprod(matrix(rnorm(4),2,2)) Psi0 <- chol(Phi0) # precompiled model uses the Cholesky scale theta0 <- c(Gamma0, Lambda0, Psi0[c(1,3,4)]) names(theta0) <- bmod$param.names # initial value Y0 <- rnorm(2) names(Y0) <- bmod$data.names # simulation dT <- runif(1, max = .1) # time step nObs <- 10 bsim <- sde.sim(bmod, x0 = Y0, theta = theta0, dt = dT, dt.sim = dT, nobs = nObs) YObs <- bsim$data # inference via MCMC binit <- sde.init(bmod, x = YObs, dt = dT, theta = theta0, nvar.obs = 1) # second component is unobserved # only Lambda1 is unknown fixed.params <- rep(TRUE, bmod$nparams) names(fixed.params) <- bmod$param.names fixed.params["Lambda1"] <- FALSE # prior on (Lambda1, Y_0) hyper <- list(mu = c(0,0), Sigma = diag(2)) names(hyper$mu) <- bmod$data.names dimnames(hyper$Sigma) <- rep(list(bmod$data.names), 2) # posterior sampling nsamples <- 1e5 burn <- 1e3 bpost <- sde.post(bmod, binit, hyper = hyper, fixed.params = fixed.params, nsamples = nsamples, burn = burn) L1.mcmc <- bpost$params[,"Lambda1"] # analytic posterior L1.seq <- seq(min(L1.mcmc), max(L1.mcmc), len = 500) L1.loglik <- sapply(L1.seq, function(l1) { lambda <- Lambda0 lambda[1] <- l1 mou.loglik(X = YObs, dt = dT, nvar.obs = 1, Gamma = Gamma0, Lambda = lambda, Phi = Phi0, mu0 = hyper$mu, Sigma0 = hyper$Sigma) }) # normalize density L1.Kalman <- exp(L1.loglik - max(L1.loglik)) L1.Kalman <- L1.Kalman/sum(L1.Kalman)/(L1.seq[2]-L1.seq[1]) # compare hist(L1.mcmc, breaks = 100, freq = FALSE, main = expression(p(Lambda[1]*" | "*bold(Y)[1])), xlab = expression(Lambda[1])) lines(L1.seq, L1.Kalman, col = "red") legend("topright", legend = c("Analytic", "MCMC"), pch = c(NA, 22), lty = c(1, NA), col = c("red", "black"))
# bivariate OU model bmod <- sde.examples("biou") # simulate some data # true parameter values Gamma0 <- .1 * crossprod(matrix(rnorm(4),2,2)) Lambda0 <- rnorm(2) Phi0 <- crossprod(matrix(rnorm(4),2,2)) Psi0 <- chol(Phi0) # precompiled model uses the Cholesky scale theta0 <- c(Gamma0, Lambda0, Psi0[c(1,3,4)]) names(theta0) <- bmod$param.names # initial value Y0 <- rnorm(2) names(Y0) <- bmod$data.names # simulation dT <- runif(1, max = .1) # time step nObs <- 10 bsim <- sde.sim(bmod, x0 = Y0, theta = theta0, dt = dT, dt.sim = dT, nobs = nObs) YObs <- bsim$data # inference via MCMC binit <- sde.init(bmod, x = YObs, dt = dT, theta = theta0, nvar.obs = 1) # second component is unobserved # only Lambda1 is unknown fixed.params <- rep(TRUE, bmod$nparams) names(fixed.params) <- bmod$param.names fixed.params["Lambda1"] <- FALSE # prior on (Lambda1, Y_0) hyper <- list(mu = c(0,0), Sigma = diag(2)) names(hyper$mu) <- bmod$data.names dimnames(hyper$Sigma) <- rep(list(bmod$data.names), 2) # posterior sampling nsamples <- 1e5 burn <- 1e3 bpost <- sde.post(bmod, binit, hyper = hyper, fixed.params = fixed.params, nsamples = nsamples, burn = burn) L1.mcmc <- bpost$params[,"Lambda1"] # analytic posterior L1.seq <- seq(min(L1.mcmc), max(L1.mcmc), len = 500) L1.loglik <- sapply(L1.seq, function(l1) { lambda <- Lambda0 lambda[1] <- l1 mou.loglik(X = YObs, dt = dT, nvar.obs = 1, Gamma = Gamma0, Lambda = lambda, Phi = Phi0, mu0 = hyper$mu, Sigma0 = hyper$Sigma) }) # normalize density L1.Kalman <- exp(L1.loglik - max(L1.loglik)) L1.Kalman <- L1.Kalman/sum(L1.Kalman)/(L1.seq[2]-L1.seq[1]) # compare hist(L1.mcmc, breaks = 100, freq = FALSE, main = expression(p(Lambda[1]*" | "*bold(Y)[1])), xlab = expression(Lambda[1])) lines(L1.seq, L1.Kalman, col = "red") legend("topright", legend = c("Analytic", "MCMC"), pch = c(NA, 22), lty = c(1, NA), col = c("red", "black"))
Argument checking for the default multivariate normal prior.
mvn.hyper.check(hyper, param.names, data.names)
mvn.hyper.check(hyper, param.names, data.names)
hyper |
The normal prior's hyperparameters: |
param.names |
Vector of parameter names (see Details). |
data.names |
Vector of data names (see Details). |
This function is not meant to be called directly by the user, but rather to parse the hyper-parameters of a default multivariate normal prior distribution to be passed to the C++ code in sde.prior()
and sde.post()
. This default prior is multivariate normal on the elements of (theta, x0)
specified by each of names(mu)
, rownames(Sigma)
, and colnames(Sigma)
. The remaining components are given Lebesgue priors, or a full Lebesgue prior if hyper == NULL
. If the names of mu
and Sigma
are inconsistent an error is thrown.
A list with the following elements:
mean
The mean vector.
cholSd
The upper upper Cholesky factor of the variance matrix.
thetaId
The index of the corresponding variables in theta
.
xId
The index of the corresponding variables in x0
.
Computes the SDE model's diffusion function given data and parameter values.
sde.diff(model, x, theta)
sde.diff(model, x, theta)
model |
An |
x |
A vector or matrix of data with |
theta |
A vector or matrix of parameters with |
A matrix with ndims^2
columns containing the diffusion function evaluated at x
and theta
. Each row corresponds to the upper triangular Cholesky factor of the diffusion matrix. If either input contains invalid SDE data or parameters an error is thrown.
# load Heston's model hmod <- sde.examples("hest") #' # single input theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) x0 <- c(X = log(1000), Z = 0.1) sde.diff(model = hmod, x = x0, theta = theta) #' # multiple inputs nreps <- 10 Theta <- apply(t(replicate(nreps, theta)), 2, jitter) X0 <- apply(t(replicate(nreps, x0)), 2, jitter) sde.diff(model = hmod, x = X0, theta = Theta) #' # mixed inputs sde.diff(model = hmod, x = x0, theta = Theta)
# load Heston's model hmod <- sde.examples("hest") #' # single input theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) x0 <- c(X = log(1000), Z = 0.1) sde.diff(model = hmod, x = x0, theta = theta) #' # multiple inputs nreps <- 10 Theta <- apply(t(replicate(nreps, theta)), 2, jitter) X0 <- apply(t(replicate(nreps, x0)), 2, jitter) sde.diff(model = hmod, x = X0, theta = Theta) #' # mixed inputs sde.diff(model = hmod, x = x0, theta = Theta)
Computes the SDE model's drift function given data and parameter values.
sde.drift(model, x, theta)
sde.drift(model, x, theta)
model |
An |
x |
A vector or matrix of data with |
theta |
A vector or matrix of parameters with |
A matrix with ndims
columns containing the drift function evaluated at x
and theta
. If either input contains invalid SDE data or parameters an error is thrown.
# load Heston's model hmod <- sde.examples("hest") # single input x0 <- c(X = log(1000), Z = 0.1) theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) sde.drift(model = hmod, x = x0, theta = theta) # multiple inputs nreps <- 10 Theta <- apply(t(replicate(nreps,theta)),2,jitter) X0 <- apply(t(replicate(nreps,x0)),2,jitter) sde.drift(model = hmod, x = X0, theta = Theta)
# load Heston's model hmod <- sde.examples("hest") # single input x0 <- c(X = log(1000), Z = 0.1) theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) sde.drift(model = hmod, x = x0, theta = theta) # multiple inputs nreps <- 10 Theta <- apply(t(replicate(nreps,theta)),2,jitter) X0 <- apply(t(replicate(nreps,x0)),2,jitter) sde.drift(model = hmod, x = X0, theta = Theta)
Provides sample C++
code for several SDE models.
sde.examples( model = c("hest", "pgnet", "lotvol", "biou", "eou"), file.only = FALSE )
sde.examples( model = c("hest", "pgnet", "lotvol", "biou", "eou"), file.only = FALSE )
model |
Character string giving the name of a sample model. Possible values are: |
file.only |
If |
All pre-compiled models are with the default prior and with OpenMP
disabled. A full description of the example models can be found in the package vignette; to view it run vignette("msde-exmodels")
.
An sde.model
object, or the path to the C++ model header file.
sde.make.model()
for sde.model
objects, mvn.hyper.check()
for specification of the default prior.
# Heston's model hmod <- sde.examples("hest") # load pre-compiled model # inspect model's C++ code hfile <- sde.examples("hest", file.only = TRUE) cat(readLines(hfile), sep = "\n") ## Not run: # compile it from scratch param.names <- c("alpha", "gamma", "beta", "sigma", "rho") data.names <- c("X", "Z") hmod <- sde.make.model(ModelFile = hfile, param.names = param.names, data.names = data.names) ## End(Not run)
# Heston's model hmod <- sde.examples("hest") # load pre-compiled model # inspect model's C++ code hfile <- sde.examples("hest", file.only = TRUE) cat(readLines(hfile), sep = "\n") ## Not run: # compile it from scratch param.names <- c("alpha", "gamma", "beta", "sigma", "rho") data.names <- c("X", "Z") hmod <- sde.make.model(ModelFile = hfile, param.names = param.names, data.names = data.names) ## End(Not run)
Specifies the observed SDE data, interobservation times, initial parameter and missing data values to be supplied to sde.post()
.
sde.init(model, x, dt, m = 1, nvar.obs, theta)
sde.init(model, x, dt, m = 1, nvar.obs, theta)
model |
An |
x |
An |
dt |
A scalar or length |
m |
Positive integer, such that |
nvar.obs |
A scalar or length |
theta |
A length |
An sde.init
object, corresponding to a list with elements:
data
An ncomp x ndims
matrix of complete data, where ncomp = N_m = m * (nobs-1)+1
.
dt.m
The complete data interobservation time, dt_m = dt/m
.
nvar.obs.m
The number of variables observed per row of data
. Note that nvar.obs.m[(i-1)*m+1] == nvar.obs[ii]
, and that nvar.obs.m[i-1] == 0
if i
is not a multiple of m
.
params
Parameter initial values.
# load Heston's model hmod <- sde.examples("hest") # generate some observed data nObs <- 5 x0 <- c(X = log(1000), Z = 0.1) X0 <- apply(t(replicate(nObs, x0)), 2, jitter) dT <- .6 theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) # no missing data sde.init(model = hmod, x = X0, dt = dT, theta = theta) # all but endpoint volatilities are missing sde.init(model = hmod, x = X0, dt = dT, m = 1, nvar.obs = c(2, rep(1, nObs-2), 2), theta = theta) # all volatilities missing, # two completely missing SDE timepoints between observations m <- 3 # divide each observation interval into m equally spaced timepoints sde.init(model = hmod, x = X0, dt = dT, m = m, nvar.obs = 1, theta = theta)
# load Heston's model hmod <- sde.examples("hest") # generate some observed data nObs <- 5 x0 <- c(X = log(1000), Z = 0.1) X0 <- apply(t(replicate(nObs, x0)), 2, jitter) dT <- .6 theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) # no missing data sde.init(model = hmod, x = X0, dt = dT, theta = theta) # all but endpoint volatilities are missing sde.init(model = hmod, x = X0, dt = dT, m = 1, nvar.obs = c(2, rep(1, nObs-2), 2), theta = theta) # all volatilities missing, # two completely missing SDE timepoints between observations m <- 3 # divide each observation interval into m equally spaced timepoints sde.init(model = hmod, x = X0, dt = dT, m = m, nvar.obs = 1, theta = theta)
Evaluates the loglikelihood function given SDE data and parameter values.
sde.loglik(model, x, dt, theta, ncores = 1)
sde.loglik(model, x, dt, theta, ncores = 1)
model |
An |
x |
A matrix or 3-d array of data with |
dt |
A scalar or vector of length |
theta |
A vector or matrix of parameters with |
ncores |
If |
A vector of loglikelihood evaluations, of the same length as the third dimension of x
and/or first dimension of theta
. If input contains invalid data or parameters an error is thrown.
# load Heston's model hmod <- sde.examples("hest") # Simulate data nreps <- 10 nobs <- 100 theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) Theta <- apply(t(replicate(nreps, theta)), 2, jitter) x0 <- c(X = log(1000), Z = 0.1) X0 <- apply(t(replicate(nreps,x0)), 2, jitter) dT <- 1/252 hsim <- sde.sim(model = hmod, x0 = X0, theta = Theta, dt = dT, dt.sim = dT/10, nobs = nobs, nreps = nreps) # single parameter, single data sde.loglik(model = hmod, x = hsim$data[,,1], dt = dT, theta = theta) # multiple parameters, single data sde.loglik(model = hmod, x = hsim$data[,,1], dt = dT, theta = Theta) # multiple parameters, multiple data sde.loglik(model = hmod, x = hsim$data, dt = dT, theta = Theta)
# load Heston's model hmod <- sde.examples("hest") # Simulate data nreps <- 10 nobs <- 100 theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) Theta <- apply(t(replicate(nreps, theta)), 2, jitter) x0 <- c(X = log(1000), Z = 0.1) X0 <- apply(t(replicate(nreps,x0)), 2, jitter) dT <- 1/252 hsim <- sde.sim(model = hmod, x0 = X0, theta = Theta, dt = dT, dt.sim = dT/10, nobs = nobs, nreps = nreps) # single parameter, single data sde.loglik(model = hmod, x = hsim$data[,,1], dt = dT, theta = theta) # multiple parameters, single data sde.loglik(model = hmod, x = hsim$data[,,1], dt = dT, theta = Theta) # multiple parameters, multiple data sde.loglik(model = hmod, x = hsim$data, dt = dT, theta = Theta)
Compiles the C++ code for various SDE-related algorithms and makes the routines available within R.
sde.make.model( ModelFile, PriorFile = "default", data.names, param.names, hyper.check, OpenMP = FALSE, ... )
sde.make.model( ModelFile, PriorFile = "default", data.names, param.names, hyper.check, OpenMP = FALSE, ... )
ModelFile |
Path to the header file where the SDE model is defined. |
PriorFile |
Path to the header file where the SDE prior is defined. See |
data.names |
Vector of names for the SDE components. Defaults to |
param.names |
Vector of names for the SDE parameters. Defaults to |
hyper.check |
A function with arguments |
OpenMP |
Logical; whether the model is compiled with |
... |
additional arguments to |
An sde.model
object, consisting of a list with the following elements:
ptr
Pointer to C++ sde object (sdeRobj
) implementing the member functions: drift/diffusion, data/parameter validators, loglikelihood, prior distribution, forward simulation, MCMC algorithm for Bayesian inference.
ndims
, nparams
The number of SDE components and parameters.
data.names
, param.names
The names of the SDE components and parameters.
omp
A logical flag for whether or not the model was compiled for multicore functionality with OpenMP
.
sde.drift()
, sde.diff()
, sde.valid()
, sde.loglik()
, sde.prior()
, sde.sim()
, sde.post()
.
# header (C++) file for Heston's model hfile <- sde.examples("hest", file.only = TRUE) cat(readLines(hfile), sep = "\n") # compile the model param.names <- c("alpha", "gamma", "beta", "sigma", "rho") data.names <- c("X", "Z") hmod <- sde.make.model(ModelFile = hfile, param.names = param.names, data.names = data.names) hmod
# header (C++) file for Heston's model hfile <- sde.examples("hest", file.only = TRUE) cat(readLines(hfile), sep = "\n") # compile the model param.names <- c("alpha", "gamma", "beta", "sigma", "rho") data.names <- c("X", "Z") hmod <- sde.make.model(ModelFile = hfile, param.names = param.names, data.names = data.names) hmod
A Metropolis-within-Gibbs sampler for the Euler-Maruyama approximation to the true posterior density.
sde.post( model, init, hyper, nsamples, burn, mwg.sd = NULL, adapt = TRUE, loglik.out = FALSE, last.miss.out = FALSE, update.data = TRUE, data.out, update.params = TRUE, fixed.params, ncores = 1, verbose = TRUE )
sde.post( model, init, hyper, nsamples, burn, mwg.sd = NULL, adapt = TRUE, loglik.out = FALSE, last.miss.out = FALSE, update.data = TRUE, data.out, update.params = TRUE, fixed.params, ncores = 1, verbose = TRUE )
model |
An |
init |
An |
hyper |
The hyperparameters of the SDE prior. See |
nsamples |
Number of MCMC iterations. |
burn |
Integer number of burn-in samples, or fraction of |
mwg.sd |
Standard deviation jump size for Metropolis-within-Gibbs on parameters and missing components of first SDE observation (see Details). |
adapt |
Logical or list to specify adaptive Metropolis-within-Gibbs sampling (see Details). |
loglik.out |
Logical, whether to return the loglikelihood at each step. |
last.miss.out |
Logical, whether to return the missing sde components of the last observation. |
update.data |
Logical, whether to update the missing data. |
data.out |
A scalar, integer vector, or list of three integer vectors determining the subset of data to be returned (see Details). |
update.params |
Logical, whether to update the model parameters. |
fixed.params |
Logical vector of length |
ncores |
If |
verbose |
Logical, whether to periodically output MCMC status. |
The Metropolis-within-Gibbs (MWG) jump sizes can be specified as a scalar, a vector or length nparams + ndims
, or a named vector containing the elements defined by sde.init$nvar.obs.m[1]
(the missing variables in the first SDE observation) and fixed.params
(the SDE parameters which are not held fixed). The default jump sizes for each MWG random variable are .25 * |initial_value|
when |initial_value| > 0
, and 1 otherwise.
adapt == TRUE
implements an adaptive MCMC proposal by Roberts and Rosenthal (2009). At step of the MCMC, the jump size of each MWG random variable is increased or decreased by
, depending on whether the cumulative acceptance rate is above or below the optimal value of 0.44. If
is the size of the jump at step
, then the next jump size is determined by
When adapt
is not logical, it is a list with elements max
and rate
, such that delta(n) = min(max, 1/n^rate)
. These elements can be scalars or vectors in the same manner as mwg.sd
.
For SDE models with thousands of latent variables, data.out
can be used to thin the MCMC missing data output. An integer vector or scalar returns specific or evenly-spaced posterior samples from the ncomp x ndims
complete data matrix. A list with elements isamples
, icomp
, and idims
determines which samples, time points, and SDE variables to return. The first of these can be a scalar or vector with the same meaning as before.
A list with elements:
params
An nsamples x nparams
matrix of posterior parameter draws.
data
A 3-d array of posterior missing data draws, for which the output dimensions are specified by data.out
.
init
The sde.init
object which initialized the sampler.
data.out
A list of three integer vectors specifying which timepoints, variables, and MCMC iterations correspond to the values in the data
output.
mwg.sd
A named vector of Metropolis-within-Gibbs standard devations used at the last posterior iteration.
hyper
The hyperparameter specification.
loglik
If loglik.out == TRUE
, the vector of nsamples
complete data loglikelihoods calculated at each posterior sample.
last.iter
A list with elements data
and params
giving the last MCMC sample. Useful for resuming the MCMC from that point.
last.miss
If last.miss.out == TRUE
, an nsamples x nmissN
matrix of all posterior draws for the missing data in the final observation. Useful for SDE forecasting at future timepoints.
accept
A named list of acceptance rates for the various components of the MCMC sampler.
Roberts, G.O. and Rosenthal, J.S. "Examples of adaptive MCMC." Journal of Computational and Graphical Statistics 18.2 (2009): 349-367. http://www.probability.ca/jeff/ftpdir/adaptex.pdf.
# Posterior inference for Heston's model hmod <- sde.examples("hest") # load pre-compiled model # Simulate data X0 <- c(X = log(1000), Z = 0.1) theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) dT <- 1/252 nobs <- 1000 hest.sim <- sde.sim(model = hmod, x0 = X0, theta = theta, dt = dT, dt.sim = dT/10, nobs = nobs) # initialize MCMC sampler # both components observed, no missing data between observations init <- sde.init(model = hmod, x = hest.sim$data, dt = hest.sim$dt, theta = theta) # Initialize posterior sampling argument nsamples <- 1e4 burn <- 1e3 hyper <- NULL # flat prior hest.post <- sde.post(model = hmod, init = init, hyper = hyper, nsamples = nsamples, burn = burn) # plot the histogram for the sampled parameters par(mfrow = c(2,3)) for(ii in 1:length(hmod$param.names)) { hist(hest.post$params[,ii],breaks=100, freq = FALSE, main = parse(text = hmod$param.names[ii]), xlab = "") }
# Posterior inference for Heston's model hmod <- sde.examples("hest") # load pre-compiled model # Simulate data X0 <- c(X = log(1000), Z = 0.1) theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) dT <- 1/252 nobs <- 1000 hest.sim <- sde.sim(model = hmod, x0 = X0, theta = theta, dt = dT, dt.sim = dT/10, nobs = nobs) # initialize MCMC sampler # both components observed, no missing data between observations init <- sde.init(model = hmod, x = hest.sim$data, dt = hest.sim$dt, theta = theta) # Initialize posterior sampling argument nsamples <- 1e4 burn <- 1e3 hyper <- NULL # flat prior hest.post <- sde.post(model = hmod, init = init, hyper = hyper, nsamples = nsamples, burn = burn) # plot the histogram for the sampled parameters par(mfrow = c(2,3)) for(ii in 1:length(hmod$param.names)) { hist(hest.post$params[,ii],breaks=100, freq = FALSE, main = parse(text = hmod$param.names[ii]), xlab = "") }
Evaluates the SDE prior given data, parameter, and hyperparameter values.
sde.prior(model, theta, x, hyper)
sde.prior(model, theta, x, hyper)
model |
An |
theta |
A vector or matrix of parameters with |
x |
A vector or matrix of data with |
hyper |
The hyperparameters of the SDE prior. See Details. |
The prior is constructed at the C++
level by defining a function (i.e., public member)
double logPrior(double *theta, double *x)
within the sdePrior
class. At the R
level, the hyper.check
argument of sde.make.model()
is a function with arguments hyper
, param.names
, data.names
used to convert hyper
into a list of NULL
or double-vectors which get passed on to the C++
code. This function can also be used to throw R
-level errors to protect the C++
code from invalid inputs, as is done for the default prior in mvn.hyper.check()
. For a full example see the "Custom Prior" section in vignette("msde-quicktut")
.
A vector of log-prior densities evaluated at the inputs.
hmod <- sde.examples("hest") # load Heston's model # setting prior for 3 parameters rv.names <- c("alpha","gamma","rho") mu <- rnorm(3) Sigma <- crossprod(matrix(rnorm(9),3,3)) names(mu) <- rv.names colnames(Sigma) <- rv.names rownames(Sigma) <- rv.names hyper <- list(mu = mu, Sigma = Sigma) # Simulate data nreps <- 10 theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) x0 <- c(X = log(1000), Z = 0.1) Theta <- apply(t(replicate(nreps,theta)),2,jitter) X0 <- apply(t(replicate(nreps,x0)),2,jitter) sde.prior(model = hmod, x = X0, theta = Theta, hyper = hyper)
hmod <- sde.examples("hest") # load Heston's model # setting prior for 3 parameters rv.names <- c("alpha","gamma","rho") mu <- rnorm(3) Sigma <- crossprod(matrix(rnorm(9),3,3)) names(mu) <- rv.names colnames(Sigma) <- rv.names rownames(Sigma) <- rv.names hyper <- list(mu = mu, Sigma = Sigma) # Simulate data nreps <- 10 theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) x0 <- c(X = log(1000), Z = 0.1) Theta <- apply(t(replicate(nreps,theta)),2,jitter) X0 <- apply(t(replicate(nreps,x0)),2,jitter) sde.prior(model = hmod, x = X0, theta = Theta, hyper = hyper)
Simulates a discretized Euler-Maruyama approximation to the true SDE trajectory.
sde.sim( model, x0, theta, dt, dt.sim, nobs, burn = 0, nreps = 1, max.bad.draws = 5000, verbose = TRUE )
sde.sim( model, x0, theta, dt, dt.sim, nobs, burn = 0, nreps = 1, max.bad.draws = 5000, verbose = TRUE )
model |
An |
x0 |
A vector or a matrix of size |
theta |
A vector or matrix of size |
dt |
Scalar interobservation time. |
dt.sim |
Scalar interobservation time for simulation. That is, interally the interobservation time is |
nobs |
The number of SDE observations per trajectory to generate. |
burn |
Scalar burn-in value. Either an integer giving the number of burn-in steps, or a value between 0 and 1 giving the fraction of burn-in relative to |
nreps |
The number of SDE trajectories to generate. |
max.bad.draws |
The maximum number of times that invalid forward steps are proposed. See Details. |
verbose |
Whether or not to display information on the simulation. |
The simulation algorithm is a Markov process with and
where is the SDE drift function and
is the diffusion function on the variance scale. At each step, a while-loop is used until a valid SDE draw is produced. The simulation algorithm terminates after
nreps
trajectories are drawn or once a total of max.bad.draws
are reached.
A list with elements:
data
An array of size nobs x ndims x nreps
containing the simulated SDE trajectories.
params
The vector or matrix of parameter values used to generate the data.
dt, dt.sim
The actual and internal interobservation times.
nbad
The total number of bad draws.
# load pre-compiled model hmod <- sde.examples("hest") # initial values x0 <- c(X = log(1000), Z = 0.1) theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) # simulate data dT <- 1/252 nobs <- 2000 burn <- 500 hsim <- sde.sim(model = hmod, x0 = x0, theta = theta, dt = dT, dt.sim = dT/10, nobs = nobs, burn = burn) par(mfrow = c(1,2)) plot(hsim$data[,"X"], type = "l", xlab = "Time", ylab = "", main = expression(X[t])) plot(hsim$data[,"Z"], type = "l", xlab = "Time", ylab = "", main = expression(Z[t]))
# load pre-compiled model hmod <- sde.examples("hest") # initial values x0 <- c(X = log(1000), Z = 0.1) theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) # simulate data dT <- 1/252 nobs <- 2000 burn <- 500 hsim <- sde.sim(model = hmod, x0 = x0, theta = theta, dt = dT, dt.sim = dT/10, nobs = nobs, burn = burn) par(mfrow = c(1,2)) plot(hsim$data[,"X"], type = "l", xlab = "Time", ylab = "", main = expression(X[t])) plot(hsim$data[,"Z"], type = "l", xlab = "Time", ylab = "", main = expression(Z[t]))
Checks whether input SDE data and parameters are valid.
sde.valid.data(model, x, theta) sde.valid.params(model, theta)
sde.valid.data(model, x, theta) sde.valid.params(model, theta)
model |
An |
x |
A length- |
theta |
A length- |
A logical scalar or vector indicating whether the given data/parameter pair is valid.
# Heston's model # valid data is: Z > 0 # valid parameters are: gamma, sigma > 0, |rho| < 1, beta > .5 * sigma^2 hmod <- sde.examples("hest") # load model theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) # valid data x0 <- c(X = log(1000), Z = 0.1) sde.valid.data(model = hmod, x = x0, theta = theta) # invalid data x0 <- c(X = log(1000), Z = -0.1) sde.valid.data(model = hmod, x = x0, theta = theta) # valid parameters theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) sde.valid.params(model = hmod, theta = theta) # invalid parameters theta <- c(alpha = 0.1, gamma = -4, beta = 0.8, sigma = 0.6, rho = -0.8) sde.valid.params(model = hmod, theta = theta)
# Heston's model # valid data is: Z > 0 # valid parameters are: gamma, sigma > 0, |rho| < 1, beta > .5 * sigma^2 hmod <- sde.examples("hest") # load model theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) # valid data x0 <- c(X = log(1000), Z = 0.1) sde.valid.data(model = hmod, x = x0, theta = theta) # invalid data x0 <- c(X = log(1000), Z = -0.1) sde.valid.data(model = hmod, x = x0, theta = theta) # valid parameters theta <- c(alpha = 0.1, gamma = 1, beta = 0.8, sigma = 0.6, rho = -0.8) sde.valid.params(model = hmod, theta = theta) # invalid parameters theta <- c(alpha = 0.1, gamma = -4, beta = 0.8, sigma = 0.6, rho = -0.8) sde.valid.params(model = hmod, theta = theta)