### R script for Pseudo-marginal MCMC Exercise
### Day 2, ESAC Workshop 2017, Madrid
###
### Instructions
### (1) Step through the completed code for the tractable case comparing
### the pseudo-marginal implementation (also called Bayesian Synthetic Likelihood) 
### with the convential MCMC solution and the proposal by Sellentin & Heavens (2015)
### (arXiv: 1511.05969) 
### (2) Try adapting the given pseudo-marginal code to fit data from a genuinely 
### intractable likelihood function based on the Lotka-Volterra predator-prey model
### 
### Further info: Dr Ewan Cameron <dr.ewan.cameron@gmail.com>

library(MASS) # default installed R library
library(matrixcalc) # install.packages(c("matrixcalc","ellipse","primer"))
library(ellipse)
library(primer)

# In this example our data are known (chosen) to be drawn from a bivariate Normal with the following mean and covariance 
true.cov.matrix <- matrix(c(5,3,3,5),nrow=2,ncol=2)
true.mean <- c(2,-1)

# Our theta will have two parameters which (we know here but usually would not) control directly (and only) the mean of the sampling distribution

# Our prior for theta will be a zero-centered diffuse normal
prior.mean <- c(0,0)
prior.cov.matrix <- diag(2)*10
prior.density <- function(proposed.mean) {as.numeric(1/(2*pi)/sqrt(det(prior.cov.matrix))*exp(-proposed.mean%*%solve(prior.cov.matrix)%*%proposed.mean/2))}

obs.data <- mvrnorm(1,true.mean,true.cov.matrix) # draw our 'observed' dataset

# We know the ground truth for this simple example as the product of two Normal densities: e.g. http://www.tina-vision.net/docs/memos/2003-003.pdf
true.posterior.cov.matrix <- solve(solve(true.cov.matrix)+solve(prior.cov.matrix))
true.posterior.mean <- true.posterior.cov.matrix%*%(solve(prior.cov.matrix)%*%prior.mean+solve(true.cov.matrix)%*%obs.data)

### Run the Bayesian synthetic likelihood algorithm with 6 draws from the data generating process at each proposal
sample.data.generating.process <- function(proposed.mean) {mvrnorm(6,proposed.mean,true.cov.matrix)}

d <- 2
n <- 6
estimate.likelihood <- function(obs.data,draws) {
    # Using the formulae at the end of Section 2.1 of Price et al. (2017)
    X <- (n-1)*cov(draws)-(obs.data-colMeans(draws))%*%t(obs.data-colMeans(draws))/(1-1/n)
    if (is.positive.definite(X, tol=1e-8)) {
        output <- det(X)^((n-d-3)/2)
    } else {output <- 0}
    return(output)
}

current.mean <- obs.data
current.log.likelihood.est <- -Inf
current.log.prior.density <- log(prior.density(current.mean))
while (current.log.likelihood.est==-Inf & current.log.prior.density!=-Inf) {
current.draws <- sample.data.generating.process(current.mean)
current.log.likelihood.est <- log(estimate.likelihood(obs.data,current.draws))}

# Burn in
for (i in 1:1000) {
    proposed.mean <- current.mean + rnorm(2,0,0.25) # Simple random walk MCMC proposal
    proposed.draws <- sample.data.generating.process(proposed.mean)
    proposed.log.likelihood.est <- log(estimate.likelihood(obs.data,proposed.draws))
    proposed.log.prior.density <- log(prior.density(proposed.mean))
    if (proposed.log.likelihood.est!=-Inf & proposed.log.prior.density!=-Inf) {
    if (log(runif(1)) < proposed.log.likelihood.est-current.log.likelihood.est+proposed.log.prior.density-current.log.prior.density) { # does this accept-reject step make sense(?)
        current.mean <- proposed.mean
        current.log.likelihood.est <- proposed.log.likelihood.est
        current.log.prior.density <- proposed.log.prior.density
    }}
}
# Posterior samples
bsl.posterior.draws <- list()
for (i in 1:100000) {
    proposed.mean <- current.mean + rnorm(2,0,0.25)
    proposed.draws <- sample.data.generating.process(proposed.mean)
    proposed.log.likelihood.est <- log(estimate.likelihood(obs.data,proposed.draws))
    proposed.log.prior.density <- log(prior.density(proposed.mean))
    if (proposed.log.likelihood.est!=-Inf & proposed.log.prior.density!=-Inf) {
    if (log(runif(1)) < proposed.log.likelihood.est-current.log.likelihood.est+proposed.log.prior.density-current.log.prior.density) {
        current.mean <- proposed.mean
        current.log.likelihood.est <- proposed.log.likelihood.est
        current.log.prior.density <- proposed.log.prior.density
    }}
    bsl.posterior.draws[[i]] <- current.mean
    if ((i %% 10000)==0) {cat(i/100000*100,"% complete\n")}
}
bsl.posterior.draws <- do.call(rbind,bsl.posterior.draws)

# Plot results
quartz(width=7,height=4)
layout(cbind(c(1,3),c(2,4)))
par(mai=c(0.7,0.7,0.1,0.1))
plot(mvrnorm(100000,true.posterior.mean,true.posterior.cov.matrix),xlim=c(-9,9),ylim=c(-9,9),xlab=expression(theta[1]),ylab=expression(theta[2]),cex=0.1,pch=19,col=hsv(0,alpha=0.05))
legend("topleft","true posterior",cex=0.7,bty='n')
points(true.posterior.mean[1],true.posterior.mean[2])
lines(ellipse(true.posterior.cov.matrix,centre=true.posterior.mean,t=2.4))

plot(bsl.posterior.draws,xlim=c(-9,9),ylim=c(-9,9),xlab=expression(theta[1]),ylab=expression(theta[2]),cex=0.1,pch=19,col=hsv(0.4,alpha=0.05))
legend("topleft","pseudo-marginal w/ 6 simulated obs per step.",cex=0.7,bty='n')
points(true.posterior.mean[1],true.posterior.mean[2])
lines(ellipse(true.posterior.cov.matrix,centre=true.posterior.mean,t=2.4))
points(colMeans(bsl.posterior.draws)[1],colMeans(bsl.posterior.draws)[2],col=hsv(0.4,v=0.8),pch=21)
lines(ellipse(cov(bsl.posterior.draws),centre=colMeans(bsl.posterior.draws),t=2.4),col=hsv(0.4,v=0.8))
# Does the pseudo-marginal MCMC chain look to be producing a faithful approximation to the true posterior? What tests might we devise to characterise and test it's performance? 
# Pseudo-marginal MCMC samplers have been described as 'sticky'; does that seem like a fair description in this case?

## Run Sellentin & Heavens algorithm with 6 draws from the data generating process at each proposal
sample.data.generating.process <- function(proposed.mean) {mvrnorm(6,proposed.mean,true.cov.matrix)}

n <- 6
estimate.likelihood <- function(obs.data,draws,proposed.mean) { # t-distribution formula (their Eqn 6)
    output <- det(cov(draws))^(-1/2)/(1+(obs.data-proposed.mean)%*%solve(cov(draws))%*%(obs.data-proposed.mean)/(n-1))^(n/2)
    return(as.numeric(output))
}

current.mean <- obs.data
current.log.likelihood.est <- -Inf
while (current.log.likelihood.est==-Inf) {
current.draws <- sample.data.generating.process(current.mean)
current.log.likelihood.est <- log(estimate.likelihood(obs.data,current.draws,proposed.mean))}
current.log.prior.density <- log(prior.density(current.mean))

# Burn in
for (i in 1:1000) {
    proposed.mean <- current.mean + rnorm(2,0,0.25)
    proposed.draws <- sample.data.generating.process(proposed.mean)
    proposed.log.likelihood.est <- log(estimate.likelihood(obs.data,proposed.draws,proposed.mean))
    proposed.log.prior.density <- log(prior.density(proposed.mean))
    if (proposed.log.likelihood.est!=-Inf & proposed.log.prior.density!=-Inf) {
    if (log(runif(1)) < proposed.log.likelihood.est-current.log.likelihood.est+proposed.log.prior.density-current.log.prior.density) {
        current.mean <- proposed.mean
        current.log.likelihood.est <- proposed.log.likelihood.est
        current.log.prior.density <- proposed.log.prior.density
    }}
}
# Posterior samples
heavens.posterior.draws <- list()
for (i in 1:100000) {
    proposed.mean <- current.mean + rnorm(2,0,0.25)
    proposed.draws <- sample.data.generating.process(proposed.mean)
    proposed.log.likelihood.est <- log(estimate.likelihood(obs.data,proposed.draws,proposed.mean))
    proposed.log.prior.density <- log(prior.density(proposed.mean))
    if (proposed.log.likelihood.est!=-Inf & proposed.log.prior.density!=-Inf) {
    if (log(runif(1)) < proposed.log.likelihood.est-current.log.likelihood.est+proposed.log.prior.density-current.log.prior.density) {
        current.mean <- proposed.mean
        current.log.likelihood.est <- proposed.log.likelihood.est
        current.log.prior.density <- proposed.log.prior.density
    }}
    heavens.posterior.draws[[i]] <- current.mean
        if ((i %% 10000)==0) {cat(i/100000*100,"% complete\n")}
}
heavens.posterior.draws <- do.call(rbind,heavens.posterior.draws)

plot(heavens.posterior.draws,xlim=c(-9,9),ylim=c(-9,9),xlab=expression(theta[1]),ylab=expression(theta[2]),cex=0.1,pch=19,col=hsv(0.6,alpha=0.05))
legend("topleft","S&H t-dist w/ 6 simulated obs. per step",cex=0.7,bty='n')
points(true.posterior.mean[1],true.posterior.mean[2])
lines(ellipse(true.posterior.cov.matrix,centre=true.posterior.mean,t=2.4))
points(colMeans(heavens.posterior.draws)[1],colMeans(heavens.posterior.draws)[2],pch=21,col=hsv(0.6,v=0.8))
lines(ellipse(cov(heavens.posterior.draws),centre=colMeans(heavens.posterior.draws),t=2.4),col=hsv(0.6,v=0.8))
# Does the H&S algorithm appear to be converging towards the true posterior? How might we tweak our implementation of this algorithm to obtain a closer approximation?

### Time to try out the algorithm on a genuinely intractable problem

# our black box function will be the discrete Lotka-Volterra predator-prey model evolved under random Poisson sampling for 20 iterations: reporting as summary statistic the sum population of each species over the entire period: inputs theta (two dimensional) control the alpha penalty matrix under a simple transformation
generate.mock.data <- function(theta) { 
    if (theta[1] > 5) {theta[1] <- 5}
    if (theta[1] < -5) {theta[1] <- -5}
    if (theta[2] > 5) {theta[2] <- 5}
    if (theta[2] < -5) {theta[2] <- -5}
    theta[1] <- exp(qunif(pnorm(theta[1]/3),log(0.0001),log(0.01)))
    theta[2] <- exp(qunif(pnorm(theta[2]/3),log(0.0001),log(0.01)))
    alpha <- matrix(c(0.01,theta[1],theta[2],0.01),nrow=2)
    Nsum <- c(0,0)
    N <- c(100,100)
    for (i in 1:20) {
        N <- rpois(2,dlvcomp2(N,alpha))
        Nsum <- Nsum+N
    }
    return(Nsum)
}

# try adapting the pseudo-marginal MCMC example above to sample from the posterior formed from assuming a diffuse normal prior (e.g. mean = c(0,0), Sigma = 10*I) and the following data from known inputs

true.theta <- c(-1,1)
observed.data <- generate.mock.data(true.theta)

# does the posterior concentrate around the true theta? should you try adjusting the step size of your MCMC proposal to improve mixing? or perhaps changing the number of mock datasets used at each step in the pseudo-marginal algorithm?