Contents

  1. Altering W (simulation results)
  2. Investigating the role of W
  3. Choosing mu
  4. Results
  5. Iteratively correcting

Altering W



Investigating the role of W


apply(data,2,max)
##   bf_0.05    bf_0.1    bf_0.2    bf_0.3    bf_0.4    bf_0.5        z0 
##  9.439236 10.183908  9.910496  9.585876  9.326729  9.116836  4.957118
apply(data,2,min)
##   bf_0.05    bf_0.1    bf_0.2    bf_0.3    bf_0.4    bf_0.5        z0 
## -1.269698 -1.932341 -2.617562 -3.021545 -3.308708 -3.531611 -2.843589



Choosing mu


1. corr.muhat.gam

Read off \(\mu\) for E(abs(z))=sum(abs(z0)*pp0) using the method we discussed last week.

mu.est <- function(X){
  y=seq(0,20,0.005)
  x <- sapply(y,function(m) mean(abs(rnorm(50000,mean=m))))
  approx(x,y,xout=X)$y
}

z0.abs.pp0 <- sum(abs(z0)*pp0) # pp0 is pps from z0 (sum to 1)
muhat.gam <- mu.est(z0.abs.pp0)


2. corr.muhat.ave

\(\mu\) is average of z0.abs.pp0dash=sum(abs(z0)*pp0dash) and ph0.maxabsz0=(1-ph0)*maxz0 .

ph0.func <- function(z0,w=0.2){
  pvals <- pnorm(abs(z0),lower.tail=FALSE)*2 # convert z-scores to p-values
  tmp <- approx.bf.p(p = pvals, # find approx bf
                     f=maf, type = "cc", N=2*NN, s=0.5, W=w)[,"lABF"]
  p1 <- 1e-04 # hard coded - bad code
  nsnps <- length(tmp)
  prior <- c(1-nsnps*p1,rep(p1,nsnps))
  tmp <- c(1,tmp) # add on extra for null model
  my.denom <- coloc:::logsum(tmp + prior)
  tmp1 <- exp(tmp+prior - my.denom)
  posthi <- tmp1/sum(tmp1)
  posthi
}
ph0.tmp <- ph0.func(z0)
ph0 <- ph0.tmp[1] # prob of the null 
pp0dash <- ph0.tmp[-1] # pps including the null

z0.abs.pp0dash <- sum(abs(z0)*pp0dash) # normalise z scores then sum
ph0.maxabsz0 <- (1-ph0)*maxz0 # normalise maximum z

3. corr.z0.abs.pp0dash

\(\mu\) is simply z0.abs.pp0dash as above.


4. corr.muhat.ave.2

\(\mu\) is average of muhat.gam (1) and ph0.maxabsz0=(1-ph0)*maxz0.


Results



Iteratively correcting


testing <- function(corr.cov, Sigma, thr, V, pp, acc = min((1 - thresh)/2, 0.03)){
  
  original.cov <- corr.cov
  is_within_tolerance <- FALSE
  thr1 <- thr
  
  while (!is_within_tolerance) {
    if(dplyr::between(corr.cov, thr, thr+acc)){
      is_within_tolerance <- TRUE
    }
    
    # if corr.cov is too high
    if(corr.cov > thr+acc){ # then run again with lower threshold (remove variants)
      thr1 <- thr - (corr.cov-thr)/2
      corr.cov <- corrected_cov(mu = muhat.gam, V = V, Sigma = Sigma, pp0 = pp, thresh = thr1)
      next
    }
    
    # if corr.cov is too low
    if(corr.cov < thr){ # then run again with higher threshold (add variants)
      thr1 <- thr + (thr-corr.cov)/2
      corr.cov <- corrected_cov(mu = muhat.gam, V = V, Sigma = Sigma, pp0 = pp, thresh = thr1)
      next
    }
  } 
  
  o <- order(pp, decreasing = TRUE)  # order index for true pp
  cumpp <- cumsum(pp[o])  # cum sums of ordered pps
  wh <- which(cumpp > thr1)[1]  # how many needed to exceed thr
  wh.original <- which(cumpp > thr)[1]
  size <- cumpp[wh]
  size.original <- cumpp[wh.original]
  data.frame("original.thr" = thr, "original.size" = size.original, "original.nvar" = wh.original, "original.cov" = original.cov, "required.thr" = thr1, "new.size" = size, "new.nvar" = wh, "new.cov" = corr.cov)
}
##     original.thr original.size original.nvar original.cov required.thr
##  1:         0.60     0.6107794            35    0.7007114    0.5496443
##  2:         0.60     0.6069704             7    0.6175395    0.6000000
##  3:         0.60     0.6115291            11    0.6548572    0.5725714
##  4:         0.80     0.8040384             7    0.8193941    0.8000000
##  5:         0.80     0.8026574            80    0.9246079    0.7376960
##  6:         0.80     0.8248154            12    0.8150597    0.8000000
##  7:         0.95     0.9702981             6    0.9710041    0.9500000
##  8:         0.95     0.9695234             9    0.9680762    0.9500000
##  9:         0.80     0.8011984           103    0.9233445    0.8516255
## 10:         0.90     0.9217806            10    0.9200190    0.9000000
##      new.size new.nvar   new.cov
##  1: 0.5592029       31 0.6068894
##  2: 0.6069704        7 0.6175395
##  3: 0.6115291       11 0.6294252
##  4: 0.8040384        7 0.8193941
##  5: 0.7410350       61 0.8019581
##  6: 0.8248154       12 0.8150597
##  7: 0.9702981        6 0.9710041
##  8: 0.9695234        9 0.9680762
##  9: 0.8525967      122 0.8253885
## 10: 0.9217806       10 0.9200190
iter.correct <- function(z, maf, N0, N1, Sigma, acc = min((1 - thresh)/2, 0.03), thr){
  
  varbeta <- Var.data.cc(maf, N=N0+N1, s = N1/(N0+N1))
  pp0 <- ppfunc(z = z, V = varbeta, W = 0.2)
  muhat.gam <- mu_est(sum(abs(z) * pp0))
  corr.cov <- corrected_cov(mu = muhat.gam, V = varbeta, Sigma = Sigma, pp0 = pp0, thresh = thr)
  
  original.cov <- corr.cov # original corr.cov using the specified threshold
  is_within_tolerance <- FALSE
  thr1 <- thr
  
  while (!is_within_tolerance) {
    if(dplyr::between(corr.cov, thr, thr+acc)){
      is_within_tolerance <- TRUE
    }
    
    # if corr.cov is too high (above thr+acc)
    if(corr.cov > thr+acc){ # then run again with lower threshold (remove variants)
      thr1 <- thr - (corr.cov-thr)/2
      corr.cov <- corrected_cov(mu = muhat.gam, V = varbeta, Sigma = Sigma, pp0 = pp0, thresh = thr1)
      next
    }
    
    # if corr.cov is too low (below thr)
    if(corr.cov < thr){ # then run again with higher threshold (add variants)
      thr1 <- thr + (thr-corr.cov)/2
      corr.cov <- corrected_cov(mu = muhat.gam, V = varbeta, Sigma = Sigma, pp0 = pp0, thresh = thr1)
      next
    }
    
  } 
  
  o <- order(pp0, decreasing = TRUE)  # order index for true pp
  cumpp <- cumsum(pp0[o])  # cum sums of ordered pps
  wh <- which(cumpp > thr1)[1]  # how many needed to exceed thr
  size <- cumpp[wh]
  return(list(credset=o[1:wh],
              coverage=corr.cov))
}

 Paper plan


0. Abstract

1. Introduction

  • What is GWAS
  • What is fine-mapping, why is it important
  • Bayesian method of fine-mapping

2. The Problem

  • Over/ undercoverage, include plots above

3. Our fix

  • Introduce package and how we have fixed it
  • Include methods in here or seperately?

4. Application to real data

5. Discussion


BSU Together 5 min presentation plan

Slide 1. GWAS –> Fine-mapping

Slide 2: Bayesian approach/ assumptions/ example

Slide 2. The problem (plots showing systematic bias in claimed coverage - line or faceted plots?)

Slide 3 + 4. Our fix: Estimate true effect, simulate more cred sets for each variant considered causal, build one gam for each variant considered causal (logit(cov)~logit(size)), use these to predict prob of coverage of original cred set, weight with pps of that snp causal. Include plot of faceted claimed vrs corrected.

Slide 5: Conclusions and future directions. Try method for more than one SNP causal, apply to SuSiE model (have shown probs with their coverage), apply to CHi-C methods.


Questions

  1. Why did we move away from random forests and into GAMs?
  2. Possible improvements: Increase number of simulated cs for each SNP considered causal from 5000 to 10000 (in zj_pp function)? Need to weigh up the cost/benefits.
  3. What should I focus on for the 5 min BSU together talk (will probs take me 5 mins to explain our correction).
  4. Is my iterative correction method ok?
  5. Which final corrected coverage method should we use? I think light blue (corr.muhat.ave) but note that it is just as good as the maxz method for maxz>4 (see plot)? It is however, better for lower max \(Z\) score scenarios. When we have chosen a final corrected coverage method, I will update the corrcoverage package to use this method.


 Next steps


  • Update corrcoverage package with chosen correction method.
  • Finalise iterative correction approach.