cFDR


I derive a cFDR estimator for a continuous auxillary covariate (q):

\[\begin{equation} \begin{split} \widehat{cFDR}(p,q) &= P(H_0^p\text{ | }P\leq p, Q\leq q)\\[2ex] &= \dfrac{P(P\leq p\text{ | }H_0^p, Q\leq q) P(H_0^p\text{ | }Q\leq q)}{P(P\leq p \text{ | }Q\leq q)}\\[2ex] &= \dfrac{P(P\leq p\text{ | }H_0^p)}{\dfrac{P(P\leq p, Q\leq q)}{P(Q\leq q)}} P(H_0^p\text{ | }Q\leq q)\\[2ex] &= \dfrac{p P(Q\leq q)}{P(P\leq p, Q\leq q)} P(H_0^p\text{ | }Q\leq q)\\[2ex] &= \dfrac{p \int_{-\infty}^q \int_0^{\infty} f(x,y)dxdy}{\int_{-\infty}^q \int_{z_p}^{\infty} f(x,y)dxdy} P(H_0^p\text{ | }Q\leq q) \end{split} \end{equation}\]

So that,

\[\begin{equation} \widehat{cFDR}_{unadj}(p,q) = \dfrac{p \int_{-\infty}^q \int_0^{\infty} f(x,y)dxdy}{\int_{-\infty}^q \int_{z_p}^{\infty} f(x,y)dxdy} \end{equation}\]

and

\[\begin{equation} \widehat{cFDR}_{adj}(p,q) = \dfrac{p \int_{-\infty}^q \int_0^{\infty} f(x,y)dxdy}{\int_{-\infty}^q \int_{z_p}^{\infty} f(x,y)dxdy} \widehat{P(H_0^p\text{ | }Q\leq q)} \end{equation}\]

where \(\widehat{P(H_0^p\text{ | }Q\leq q)}=\dfrac{P(Q\leq q|p>1/2)}{P(Q\leq q)}\).


pq_full <- readRDS("realdata.RDS")

par(mfrow = c(1,2))
hist(pq_full$p, breaks = 100, main = "Histogram of P")
hist(pq_full$q, breaks = 100, main = "Histogram of Q")

chr1 = which(pq_full$chr == 1)
candidate_indices = which(pq_full$p < 0.0005 | pq_full$q > 1)

indices = intersect(chr1, candidate_indices)

fold = chr1 # leave out these indices

mode = 2 # leave one-chromosome-out

adj = TRUE # new method does adjustment separately

nt = 5000 # resolution of x
nv = 1000 # resolution of L curves (q)

res_p = 500 # resolution for KDE estimates
res_q = 200

p <- pq_full$p
q <- pq_full$q

gx <- 10^-30

zp <- -qnorm(pq_full$p/2)

maxx = max(c(10, zp))

# zp[which(zp > maxx)] = maxx

maxy = max(q)
miny = min(q)

if (mode==2) sub = setdiff(1:length(zp), fold) else sub = 1:length(zp)

# define limits of q by adding 20% either side
q_20 <- (max(q[sub]) - min(q[sub])) * 0.2

# define limits of p similar to james
lims=c(0, max(12, maxx), miny-q_20, maxy+q_20) # c(xl, xu, yl, yu)

kpq = kde2d(zp[sub], q[sub], n=c(res_p, res_q), lims=lims) 

df <- data.frame(ZP = rep(kpq$x, times = res_q), Q = rep(kpq$y, each = res_p), z = as.vector(kpq$z))

ggplot(data = df, aes(x = ZP, y = z, group = Q, col = Q)) + geom_line(cex = 0.2) + theme_cowplot(12) + background_grid(major = "xy", minor = "none") + ylab("kpq$z") + ggtitle('KDE of P,Q')

# need Fdr rather than fdr to sum to get P(P\leq p, Q\leq q)
# think carefully about this bit because James' code looks for ZQ>zq (we need Q<q)
# int2_kpq=t(apply(apply(kpq$z[res:1,res:1],2,cumsum),1,cumsum))[res:1,res:1]

# int2_kpq estimates P(ZP\geq zp, Q\leq q)
int2_kpq = t(apply(apply(kpq$z[res_p:1,],2,cumsum),1,cumsum))[res_p:1,]

df <- data.frame(ZP = rep(kpq$x, times = res_q), Q = rep(kpq$y, each = res_p), z = as.vector(int2_kpq))

ggplot(data = df, aes(x = ZP, y = z, group = Q, col = Q)) + geom_line(cex = 0.2) + theme_cowplot(12) + background_grid(major = "xy", minor = "none") + ylab("kpq$z") + ggtitle('"Integration" (summing) for P(ZP>=zp, Q<= q)')