Hierarchical Bayesian Dirichlet regression models using Integrated Nested Laplace Approximation • dirinla

The goal of dirinla is to analyze compositional data with a Dirichlet regression using the integrated nested Laplace approximation via the R-INLA package.

Package documentation can be found at https://inlabru-org.github.io/dirinla/

Installation

It is not yet in CRAN, but you can install the latest bugfix release of dirinla from github with:

# install.packages("remotes")
remotes::install_github("inlabru-org/dirinla", ref = "master")

The latest development version can be installed via inlabru-org.r-universe.dev:

# Enable universe(s) by inlabru-org
options(repos = c(
  inlabruorg = 'https://inlabru-org.r-universe.dev',
  INLA = 'https://inla.r-inla-download.org/R/testing',
  CRAN = 'https://cloud.r-project.org'))

# Install the package
install.packages('dirinla')

or directly from github:

# install.packages("remotes")
remotes::install_github("inlabru-org/dirinla", ref = "devel")

Example

This is a basic example which shows you how to solve a common problem:

Loading libraries

library(dirinla)
library(INLA)
library(DirichletReg)

Simulating from a Dirichlet likelihood

set.seed(1000)
N <- 50 #number of data
V <- as.data.frame(matrix(runif((4) * N, 0, 1), ncol = 4)) #Covariates
names(V) <- paste0('v', 1:4)

formula <- y ~ 1 + v1 | 1 + v2 | 1 + v3 | 1 + v4
(names_cat <- formula_list(formula))
#> $`category 1`
#> [1] "intercept" "v1"       
#> 
#> $`category 2`
#> [1] "intercept" "v2"       
#> 
#> $`category 3`
#> [1] "intercept" "v3"       
#> 
#> $`category 4`
#> [1] "intercept" "v4"

x <- c(-1.5, 1, -3, 1.5,
       2, -3 , -1, 5)

mus <- exp(x) / sum(exp(x))
C <- length(names_cat)
A_construct <-
  data_stack_dirich(y = as.vector(rep(NA, N * C)),
                    covariates = names_cat,
                    data       = V,
                    d          = C,
                    n          = N)

A_construct[1:8, ]
#> 8 x 8 sparse Matrix of class "dgCMatrix"
#>                                                     
#> [1,] 1 0.3278787 . .         . .         . .        
#> [2,] . .         1 0.7267993 . .         . .        
#> [3,] . .         . .         1 0.5993679 . .        
#> [4,] . .         . .         . .         1 0.3224284
#> [5,] 1 0.7588465 . .         . .         . .        
#> [6,] . .         1 0.6820559 . .         . .        
#> [7,] . .         . .         1 0.4516818 . .        
#> [8,] . .         . .         . .         1 0.5613199

eta <- A_construct %*% x
alpha <- exp(eta)
alpha <- matrix(alpha,
                ncol  = C,
                byrow = TRUE)
y_o <- rdirichlet(N, alpha)
colnames(y_o) <- paste0("y", 1:C)
head(y_o)
#>                y1           y2          y3        y4
#> [1,] 1.139109e-04 2.413110e-01 0.345644051 0.4129310
#> [2,] 7.342592e-02 3.633687e-03 0.193128806 0.7298116
#> [3,] 2.079875e-02 4.007038e-05 0.323599848 0.6555613
#> [4,] 1.361278e-05 3.894290e-02 0.172308411 0.7887351
#> [5,] 2.339991e-02 4.035880e-03 0.055003377 0.9175608
#> [6,] 8.910796e-01 6.743970e-10 0.000663061 0.1082573

Fitting a simple model

y <- y_o
model.inla <- dirinlareg(
  formula  = y ~ 1 + v1 | 1 + v2 | 1 + v3 | 1 + v4,
  y        = y,
  data.cov = V,
  prec     = 0.0001,
  verbose  = TRUE)
#> 
#>  
#>  ---------------------- Looking for the mode ----------------- 
#>  
#>  Iter = 1, |grad| = 824.56, log.post = -457.73, |x_new - x_old| = 13.92583, |f_new - f_old| = 368.13749
#> Iter = 2, |grad| = 136.74, log.post = -577.55, |x_new - x_old| = 3.65634, |f_new - f_old| = 119.82746
#> Iter = 3, |grad| = 120.98, log.post = -671.77, |x_new - x_old| = 3.31293, |f_new - f_old| = 94.22001
#> Iter = 4, |grad| = 96.8, log.post = -739.43, |x_new - x_old| = 2.81732, |f_new - f_old| = 67.65798
#> Iter = 5, |grad| = 70.76, log.post = -785.42, |x_new - x_old| = 2.1453, |f_new - f_old| = 45.99234
#> Iter = 6, |grad| = 48.13, log.post = -809.29, |x_new - x_old| = 1.3377, |f_new - f_old| = 23.86892
#> Iter = 7, |grad| = 24.92, log.post = -814.96, |x_new - x_old| = 0.55663, |f_new - f_old| = 5.67249
#> Iter = 8, |grad| = 5.97, log.post = -815.24, |x_new - x_old| = 0.10262, |f_new - f_old| = 0.27719
#> Iter = 9, |grad| = 0.3, log.post = -815.24, |x_new - x_old| = 0.00994, |f_new - f_old| = 0.00062
#> Iter = 10, |grad| = 0, log.post = -815.24, |x_new - x_old| = 0.00011, |f_new - f_old| = 0
#> Iter = 11, |grad| = 0, log.post = -815.24, |x_new - x_old| = 0, |f_new - f_old| = 0
#> 
#> Great news! The mode has been properly located!
#>  
#> Real Hessian has been used 10 times 
#> Expected Hessian has been used 40 times 
#> 
#>  ----------------------    INLA call    ----------------- 
#> INLA-Iter = 1, fixed.effects = 8, hyperparameters = 0 ---> PASS
#> 
#>  ---------------------- Obtaining linear predictor -----------------

summary(model.inla)
#> 
#> Call: 
#>  dirinlareg(formula = y ~ 1 + v1 | 1 + v2 | 1 + v3 | 1 + v4, y = y, 
#>     data.cov = V, prec = 1e-04, verbose = TRUE)
#> 
#>  
#> ---- FIXED EFFECTS ---- 
#> ======================================================================= 
#> y1
#> ----------------------------------------------------------------------- 
#>              mean     sd 0.025quant 0.5quant 0.975quant    mode
#> intercept -1.5293 0.2922    -2.1020  -1.5293    -0.9565 -1.5293
#> v1         0.9977 0.5146    -0.0108   0.9977     2.0063  0.9977
#> ======================================================================= 
#> y2
#> ----------------------------------------------------------------------- 
#>              mean     sd 0.025quant 0.5quant 0.975quant    mode
#> intercept -2.8538 0.2783    -3.3993  -2.8538     -2.308 -2.8538
#> v2         0.7187 0.4870    -0.2357   0.7187      1.673  0.7187
#> ======================================================================= 
#> y3
#> ----------------------------------------------------------------------- 
#>             mean     sd 0.025quant 0.5quant 0.975quant   mode
#> intercept  1.902 0.2448      1.422    1.902      2.382  1.902
#> v3        -3.045 0.3738     -3.777   -3.045     -2.312 -3.045
#> ======================================================================= 
#> y4
#> ----------------------------------------------------------------------- 
#>              mean     sd 0.025quant 0.5quant 0.975quant    mode
#> intercept -0.7033 0.3145     -1.320  -0.7033   -0.08698 -0.7033
#> v4         4.5247 0.4313      3.679   4.5247    5.37012  4.5247
#> ======================================================================= 
#> 
#> ---- HYPERPARAMETERS ---- 
#> 
#>  No hyperparameters in the model 
#> ======================================================================= 
#> DIC = 1555.1541 , WAIC = 1039.5909 , LCPO = 779.5896 
#> Number of observations: 50
#> Number of Categories: 4

Predicting for v1 = 0.25, v2 = 0.5, v3 = 0.5, v4 = 0.1

model.prediction <-
  predict(model.inla,
                  data.pred = data.frame(v1 = 0.25,
                                         v2 = 0.5,
                                         v3 = 0.5,
                                         v4 = 0.1))
#> 
#>  
#>  ---------------------- Predicting ----------------- 
#>  
#> 
model.prediction$summary_predictive_means
#> $y1
#>            Min.    1st Qu.    Median     Mean   3rd Qu.      Max.
#> [1,] 0.02366365 0.08238497 0.1044544 0.110075 0.1319717 0.3068168
#> 
#> $y2
#>             Min.    1st Qu.    Median       Mean   3rd Qu.      Max.
#> [1,] 0.005978457 0.02321143 0.0308166 0.03341506 0.0405673 0.1574295
#> 
#> $y3
#>          Min.   1st Qu.    Median     Mean   3rd Qu.      Max.
#> [1,] 0.238645 0.4919207 0.5581122 0.555305 0.6211048 0.8617697
#> 
#> $y4
#>            Min.   1st Qu.    Median     Mean   3rd Qu.      Max.
#> [1,] 0.08145221 0.2405351 0.2951816 0.301205 0.3543549 0.6269677