### cFDR

I derive a cFDR estimator for a continuous auxillary covariate (q):

$$$\begin{split} \widehat{cFDR}(p,q) &= P(H_0^p\text{ | }P\leq p, Q\leq q)\\[2ex] &= \dfrac{P(P\leq p\text{ | }H_0^p, Q\leq q) P(H_0^p\text{ | }Q\leq q)}{P(P\leq p \text{ | }Q\leq q)}\\[2ex] &= \dfrac{P(P\leq p\text{ | }H_0^p)}{\dfrac{P(P\leq p, Q\leq q)}{P(Q\leq q)}} P(H_0^p\text{ | }Q\leq q)\\[2ex] &= \dfrac{p P(Q\leq q)}{P(P\leq p, Q\leq q)} P(H_0^p\text{ | }Q\leq q)\\[2ex] &= \dfrac{p \int_{-\infty}^q \int_0^{\infty} f(x,y)dxdy}{\int_{-\infty}^q \int_{z_p}^{\infty} f(x,y)dxdy} P(H_0^p\text{ | }Q\leq q) \end{split}$$$

So that,

$$$\widehat{cFDR}_{unadj}(p,q) = \dfrac{p \int_{-\infty}^q \int_0^{\infty} f(x,y)dxdy}{\int_{-\infty}^q \int_{z_p}^{\infty} f(x,y)dxdy}$$$

and

$$$\widehat{cFDR}_{adj}(p,q) = \dfrac{p \int_{-\infty}^q \int_0^{\infty} f(x,y)dxdy}{\int_{-\infty}^q \int_{z_p}^{\infty} f(x,y)dxdy} \widehat{P(H_0^p\text{ | }Q\leq q)}$$$

where $$\widehat{P(H_0^p\text{ | }Q\leq q)}=\dfrac{P(Q\leq q|p>1/2)}{P(Q\leq q)}$$.

pq_full <- readRDS("realdata.RDS")

par(mfrow = c(1,2))
hist(pq_full$p, breaks = 100, main = "Histogram of P") hist(pq_full$q, breaks = 100, main = "Histogram of Q")

chr1 = which(pq_full$chr == 1) candidate_indices = which(pq_full$p < 0.0005 | pq_full$q > 1) indices = intersect(chr1, candidate_indices) fold = chr1 # leave out these indices mode = 2 # leave one-chromosome-out adj = TRUE # new method does adjustment separately nt = 5000 # resolution of x nv = 1000 # resolution of L curves (q) res_p = 500 # resolution for KDE estimates res_q = 200 p <- pq_full$p
q <- pq_full$q gx <- 10^-30 zp <- -qnorm(pq_full$p/2)

maxx = max(c(10, zp))

# zp[which(zp > maxx)] = maxx

maxy = max(q)
miny = min(q)

if (mode==2) sub = setdiff(1:length(zp), fold) else sub = 1:length(zp)

# define limits of q by adding 20% either side
q_20 <- (max(q[sub]) - min(q[sub])) * 0.2

# define limits of p similar to james
lims=c(0, max(12, maxx), miny-q_20, maxy+q_20) # c(xl, xu, yl, yu)

kpq = kde2d(zp[sub], q[sub], n=c(res_p, res_q), lims=lims)

df <- data.frame(ZP = rep(kpq$x, times = res_q), Q = rep(kpq$y, each = res_p), z = as.vector(kpq$z)) ggplot(data = df, aes(x = ZP, y = z, group = Q, col = Q)) + geom_line(cex = 0.2) + theme_cowplot(12) + background_grid(major = "xy", minor = "none") + ylab("kpq$z") + ggtitle('KDE of P,Q')

# need Fdr rather than fdr to sum to get P(P\leq p, Q\leq q)
# int2_kpq=t(apply(apply(kpq$z[res:1,res:1],2,cumsum),1,cumsum))[res:1,res:1] # int2_kpq estimates P(ZP\geq zp, Q\leq q) int2_kpq = t(apply(apply(kpq$z[res_p:1,],2,cumsum),1,cumsum))[res_p:1,]
df <- data.frame(ZP = rep(kpq$x, times = res_q), Q = rep(kpq$y, each = res_p), z = as.vector(int2_kpq))
ggplot(data = df, aes(x = ZP, y = z, group = Q, col = Q)) + geom_line(cex = 0.2) + theme_cowplot(12) + background_grid(major = "xy", minor = "none") + ylab("kpq\$z") + ggtitle('"Integration" (summing) for P(ZP>=zp, Q<= q)')