### R script for Introduction to Bayesian Analysis with R-INLA
### Day 2, ESAC Workshop 2017, Madrid
###
### Instructions
### (1) Step through the completed code with default parameterisations;
### questions in comments are there to check that *you* understand how 
### the models are structured
### 
### Further info: Dr Ewan Cameron <dr.ewan.cameron@gmail.com>

library(INLA) # install.packages("INLA", repos="https://inla.r-inla-download.org/R/stable", dep=TRUE)
library(rgeos) # install.packages("rgeos")
library(quantreg) # install.packages(c("rgeos","quantreg"))

## Extract a nice clean sample of galaxies to play with from Nair & Abraham (2010)

nair.data <- read.csv("NairAbrahamMorphology.cat",sep="\t",head=1)

valid <- which(nair.data$z > 0.025 & nair.data$z < 0.035 &  nair.data$Mass < 999999 & nair.data$Mass > 9 & nair.data$SFRM > -15 & nair.data$V_disp > 20 & nair.data$RA > 150 & nair.data$RA < 225 & nair.data$DEC > 40 & nair.data$DEC < 60 & nair.data$TType < 99)

mass <- nair.data$Mass[valid]
mass_err <- nair.data$Mass[valid]
sfrm <- nair.data$SFRM[valid]
barred <- matrix(as.integer(intToBits(nair.data$Bar[valid])),ncol=length(valid))
barred <- as.integer(barred[2,]>0 | barred[3,]>0 | barred[4,]>0)
vdisp <- nair.data$V_disp[valid]
vdisperr <- nair.data$V_disp_Err[valid]
ra <- nair.data$RA[valid]
dec <- nair.data$DEC[valid]
type <- nair.data$TType[valid]

## A logistic regression model for the bar fraction: w/ linear predictors only

# Simplest possible exploratory analysis: just bin the data and plot!
plot(seq(9.125,11.875,by=0.25),hist(mass[barred==1],breaks=seq(9,12,by=0.25),plot=F)$counts/(hist(mass[barred==1],breaks=seq(9,12,by=0.25),plot=F)$counts+hist(mass[barred==0],breaks=seq(9,12,by=0.25),plot=F)$counts),xlab="Mass",ylab="Bar Fraction")

plot(seq(-13.25,-8.25,by=0.5),hist(sfrm[barred==1],breaks=seq(-13.5,-8,by=0.5),plot=F)$counts/(hist(sfrm[barred==1],breaks=seq(-13.5,-8,by=0.5),plot=F)$counts+hist(sfrm[barred==0],breaks=seq(-13.5,-8,by=0.5),plot=F)$counts),xlab="SFRM",ylab="Bar Fraction")

# Or make a scatter plot with barred galaxies highlighted
plot(mass,sfrm)
points(mass[barred==1],sfrm[barred==1],col="red",pch=19,cex=0.5)

# The simplest possible INLA model: a logistic regression with no random effects
data <- data.frame(mass,sfrm,barred)
formula <- barred ~ mass + sfrm # this is a standard regression model notation in R: e.g. R's default 'glm' function: the presence of a constant intercept is assumed unless explicitly removed by adding a -1 as barred ~ -1 + mass + sfrm
result <- inla(formula,family="binomial",data=data,control.compute= list(dic=TRUE, cpo = TRUE))
# family: tells INLA how the data are connected to the predictor variables (the logistic transform is assumed for binomial data) 
# control.compute: options in the class of control.XXX are input as lists and allow for additional INLA features: here we ask to compute also the DIC criterion and PIT statistics
# for more details: help(inla), help(control.compute)

summary(result)

ilogit <- function(x) {1/(1+exp(-x))}
plot(mass,sfrm,col=hsv(ilogit(result$summary.linear.predictor[,1]))) # the output of an INLA run is contained in lists (i.e, access with $name, use names(result), names(result$summary.fixed), etc.)
hist(result$cpo$pit) # why does the PIT look so bad? ( http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0181790 )

## Binomial regression for bar fraction: w/ linear predictors plus GP term

mesh <- inla.mesh.2d(loc.domain=cbind(mass,sfrm),max.edge=c(0.5,5))
plot(mesh)

spde <- inla.spde2.pcmatern(mesh,prior.range=c(0.5,0.1),prior.sigma=c(1,0.1))
A <- inla.spde.make.A(mesh,cbind(mass,sfrm))

stk <- inla.stack(data=list(barred=barred),A=list(A,1,1),effects=list(s=1:spde$n.spde,mass=mass,sfrm=sfrm),tag='est')
formula <- barred ~ mass + sfrm + f(s,model=spde)
result2 <- inla(formula,family="binomial",data=inla.stack.data(stk),control.compute= list(dic=TRUE,cpo=TRUE),control.predictor=list(A=inla.stack.A(stk)))

summary(result2)
plot(mass,sfrm,col=hsv(ilogit(result2$summary.linear.predictor[,1])))
rect(10,-11.3,11.2,-10) # this looks interesting; hypothesis?

# better check whether this model actually performed better: start with the DIC
cat("DIC Model 1 = ",result$dic$dic,"\n")
cat("DIC Model 2 = ",result2$dic$dic,"\n")
# which is preferred? what else might we do next?

## Simple shrinkage on velocity dispersion

data <- data.frame(vdisp,vdisperr,id=1:length(vdisp)) 
formula <- vdisp ~ 1 + f(id,model="iid") # represent the underlying distribution for the true velocity dispersion of each galaxy as an intercept for the mean velocity dispersion plus an iid draw from a zero mean normal of unknown (to be fitted) width 
result <- inla(formula,data=data,family="gaussian",control.family = list(hyper = list(prec = list(prior = "loggamma",param = c(10000,10000), initial = 1))),scale=(1/vdisperr^2),control.predictor=list(compute=TRUE),control.compute= list(dic=TRUE,cpo=TRUE)) # the observational errors here are introduced by specifying control family to be "gaussian" with the scale (here parameterised as inverse variance; see help(inla)) set by the 'known' observational errors
# the default inla model assumes that a rescaling of our 'known' observational errors is allowed but for now e want to use exactly the ones we're given so we set the prior to concentrate on a scaling of one param=c(10000,10000): this is a gamma(shape=10000,rate=10000) prior

layout(matrix(c(1,2,3,4),nrow=2)) # may need to resize your plotting window to see all four plots
hist(vdisp,breaks=30)
lines(seq(1,350),dnorm(seq(1,350),result$summary.fixed$mean,1/sqrt(result$summary.hyperpar$mean[2]))*15000,col="red")
plot(vdisperr,result$summary.fitted.values$sd,xlim=c(0,50),ylim=c(0,50))
lines(c(0,50),c(0,50),col="red")
plot(vdisp,result$summary.fitted.values$mean-vdisp)
lines(c(0,500),c(0,0),col="red")
plot(vdisp,vdisperr)
# do these diagnostics of the shrinkage make sense to you? is the normal distribution seem to be doing a good job here given the available data and their errors?
layout(1)
hist(result$cpo$pit) # try the PIT; what does this suggest? how might we try to improve?

## Local density estimation: there are a number of implementations that can lead to essentially the same model structure when using INLA for LGCP problems: this version is based on the guide by Elias Krainski

layout(1)
plot(ra,dec)
n <- length(ra)

loc.d <- matrix(NA,nrow=5,ncol=2) # boundary of our sample selection: this is important to specify since the likelihood of the Poisson process is defined over a specific area, not just at the locations of our data points
loc.d[1,] <- c(150,40)
loc.d[2,] <- c(225,40)
loc.d[3,] <- c(225,60)
loc.d[4,] <- c(150,60)
loc.d[5,] <- c(150,40)

mesh <- inla.mesh.2d(loc.domain=loc.d,max.edge=c(2,5),offset=0)
plot(mesh)
nv <- mesh$n # number of nodes in our mesh

inla.mesh.dual <- function(mesh) { # this is an auxiliary function from 'INLA tools' designed to help estimate the area of each Voronoi polygon
    if (mesh$manifold=='R2') {
        ce <- t(sapply(1:nrow(mesh$graph$tv), function(i)
            colMeans(mesh$loc[mesh$graph$tv[i, ], 1:2])))
        library(parallel)
        pls <- mclapply(1:mesh$n, function(i) {
            p <- unique(Reduce('rbind', lapply(1:3, function(k) {
                j <- which(mesh$graph$tv[,k]==i)
                if (length(j)>0) 
                    return(rbind(ce[j, , drop=FALSE],
                                 cbind(mesh$loc[mesh$graph$tv[j, k], 1] +
                                       mesh$loc[mesh$graph$tv[j, c(2:3,1)[k]], 1], 
                                       mesh$loc[mesh$graph$tv[j, k], 2] +
                                       mesh$loc[mesh$graph$tv[j, c(2:3,1)[k]], 2])/2))
                else return(ce[j, , drop=FALSE])
            })))
            j1 <- which(mesh$segm$bnd$idx[,1]==i)
            j2 <- which(mesh$segm$bnd$idx[,2]==i)
            if ((length(j1)>0) | (length(j2)>0)) {
                p <- unique(rbind(mesh$loc[i, 1:2], p,
                                  mesh$loc[mesh$segm$bnd$idx[j1, 1], 1:2]/2 +
                                  mesh$loc[mesh$segm$bnd$idx[j1, 2], 1:2]/2, 
                                  mesh$loc[mesh$segm$bnd$idx[j2, 1], 1:2]/2 +
                                  mesh$loc[mesh$segm$bnd$idx[j2, 2], 1:2]/2))
                yy <- p[,2]-mean(p[,2])/2-mesh$loc[i, 2]/2
                xx <- p[,1]-mean(p[,1])/2-mesh$loc[i, 1]/2
            }
            else {
                yy <- p[,2]-mesh$loc[i, 2]
                xx <- p[,1]-mesh$loc[i, 1]
            }
            Polygon(p[order(atan2(yy,xx)), ])
        })
        return(SpatialPolygons(lapply(1:mesh$n, function(i)
            Polygons(list(pls[[i]]), i))))
    }
    else stop("It only works for R2!")
}

dmesh <- inla.mesh.dual(mesh) # determine a weight for each Voronoi cell based on its area
domainSP <- SpatialPolygons(list(Polygons(list(Polygon(loc.d)), '0')))
sum(w <- sapply(1:length(dmesh), function(i) { 
if (gIntersects(dmesh[i,], domainSP))
return(gArea(gIntersection(dmesh[i,], domainSP)))
else return(0)
}))

spde <- inla.spde2.pcmatern(mesh,prior.range=c(0.5,0.1),prior.sigma=c(1,0.1)) # define the spde component
A <- inla.spde.make.A(mesh,cbind(ra,dec)) # project spde to observed data locations

y.pp <- rep(0:1, c(nv, n)) # advanced INLA data/formula wrangling to properly constrain field over mesh locations: inside and outside of observing window: further details in Krainski's guide ( http://www.math.ntnu.no/inla/r-inla.org/tutorials/spde/spde-tutorial.pdf )
e.pp <- c(w, rep(0, n))
imat <- Diagonal(nv, rep(1, nv))
A.pp <- rBind(imat, A)
stk.pp <- inla.stack(data=list(y=y.pp, e=e.pp),A=list(1,A.pp), tag='pp', effects=list(list(b0=rep(1,nv+n)), list(i=1:nv)))

pp.res <- inla(y ~ 0 + b0 + f(i, model=spde), family='poisson', data=inla.stack.data(stk.pp), control.predictor=list(A=inla.stack.A(stk.pp)), E=inla.stack.data(stk.pp)$e) # our likelihood for this model is Poisson, the link function is automatically assumed to be the natural logaritm

# Make pretty plot of fitted density
mesh.projector <- inla.mesh.projector(mesh, xlim=c(150,225), ylim=c(40,60), dims=c(200,200))
im <- matrix(as.numeric(mesh.projector$proj$A%*%pp.res$summary.random$i$mean),nrow=200)
image(seq(150,225,length.out=200),seq(40,60,length.out=200),im,xlab="RA",ylab="DEC",col=rev(rainbow(50)))
points(ra,dec)

# Does the 'local density' correlate with any galaxy properties?
local.density <- as.numeric(A%*%pp.res$summary.random$i$mean)
plot(local.density,vdisp)
upperquantilefit <- rq(vdisp ~ local.density,0.9)
lines(sort(local.density),upperquantilefit$fitted.values[sort.list(local.density)],col="red")
summary(upperquantilefit,se="boot") # urgh p~0.005! doesn't look super convincing: maybe we'd better look at a large sample size (more redshift slices and other regions of the sky