|
plot.AUC <- function(scores, logits, filename=NA, rev.ok=F) { |
|
library(PRROC) |
|
all.scores <- unique(scores) |
|
if (0 %in% all.scores & ! -1 %in% all.scores) { |
|
neg.score <- 0 |
|
} else if (-1 %in% all.scores) { |
|
neg.score <- -1 |
|
} else { |
|
neg.score <- NA |
|
} |
|
if (3 %in% all.scores & !1 %in% all.scores) { |
|
pos.score <- 3 |
|
} else { |
|
pos.score <- 1 |
|
} |
|
|
|
na.value <- is.na(scores) | is.na(logits) |
|
if (sum(na.value) == length(scores)) { |
|
PRROC.dnv.df <- list(auc=NA, cutoff=NA) |
|
} else { |
|
scores <- scores[!na.value] |
|
logits <- logits[!na.value] |
|
sel.value <- scores %in% c(neg.score, pos.score) |
|
scores <- scores[sel.value] |
|
logits <- logits[sel.value] |
|
if (length(unique(logits)) == 1 | length(unique(scores)) == 1 | length(logits)==0) { |
|
PRROC.dnv.df <- list(auc=NA, |
|
cutoff=NA, |
|
curve=matrix(-Inf, nrow = 1, ncol = 3)) |
|
} else { |
|
|
|
PRROC.dnv.df <- roc.curve(scores.class0 = logits[scores==neg.score], |
|
scores.class1 = logits[scores==pos.score], |
|
curve=TRUE) |
|
if (rev.ok & PRROC.dnv.df$auc < 0.5) { |
|
PRROC.dnv.df <- roc.curve(scores.class1 = logits[scores==neg.score], |
|
scores.class0 = logits[scores==pos.score], |
|
curve=TRUE) |
|
} |
|
if (!is.na(filename)) { |
|
pdf(filename) |
|
plot(PRROC.dnv.df) |
|
dev.off() |
|
} |
|
J_stats <- PRROC.dnv.df$curve[,2] - PRROC.dnv.df$curve[,1] |
|
optimal.cutoff <- PRROC.dnv.df$curve[which(J_stats==max(J_stats))[1],3] |
|
PRROC.dnv.df$cutoff <- optimal.cutoff |
|
print(paste0("optimal cutoff = ", optimal.cutoff)) |
|
PRROC.dnv.df |
|
} |
|
} |
|
PRROC.dnv.df |
|
} |
|
|
|
|
|
plot.PR <- function(scores, logits, filename=NA) { |
|
library(PRROC) |
|
all.scores <- unique(scores) |
|
if (0 %in% all.scores & ! -1 %in% all.scores) { |
|
neg.score <- 0 |
|
} else if (-1 %in% all.scores) { |
|
neg.score <- -1 |
|
} |
|
|
|
na.value <- is.na(scores) | is.na(logits) |
|
scores <- scores[!na.value] |
|
logits <- logits[!na.value] |
|
sel.value <- scores %in% c(neg.score, 1) |
|
scores <- scores[sel.value] |
|
logits <- logits[sel.value] |
|
if (length(unique(logits)) == 1) { |
|
PRROC.dnv.df <- list(auc=NA, |
|
cutoff=NA, |
|
curve=matrix(-Inf, nrow = 1, ncol = 3)) |
|
} else { |
|
|
|
PRROC.dnv.df <- pr.curve(scores.class0 = logits[scores==neg.score], |
|
scores.class1 = logits[scores==1], |
|
curve=TRUE) |
|
if (PRROC.dnv.df$auc.integral <= 0.5) { |
|
PRROC.dnv.df <- pr.curve(scores.class0 = logits[scores==1], |
|
scores.class1 = logits[scores==neg.score], |
|
curve=TRUE) |
|
} |
|
if (!is.na(filename)) { |
|
pdf(filename) |
|
plot(PRROC.dnv.df) |
|
dev.off() |
|
} |
|
F1 <- 2 * PRROC.dnv.df$curve[,2] * PRROC.dnv.df$curve[,1] / (PRROC.dnv.df$curve[,2] + PRROC.dnv.df$curve[,1]) |
|
optimal.cutoff <- PRROC.dnv.df$curve[which(F1==max(F1))[1],3] |
|
PRROC.dnv.df$cutoff <- optimal.cutoff |
|
PRROC.dnv.df$auc <- PRROC.dnv.df$auc.integral |
|
print(paste0("optimal cutoff = ", optimal.cutoff)) |
|
PRROC.dnv.df |
|
} |
|
} |
|
|
|
|
|
plot.AUC.by.uniprotID <- function(scores, logits, uniprotIDs) { |
|
library(PRROC) |
|
all.scores <- unique(scores) |
|
if (0 %in% all.scores & ! -1 %in% all.scores) { |
|
neg.score <- 0 |
|
} else if (-1 %in% all.scores) { |
|
neg.score <- -1 |
|
} |
|
|
|
na.value <- is.na(scores) | is.na(logits) | is.na(uniprotIDs) |
|
scores <- scores[!na.value] |
|
logits <- logits[!na.value] |
|
uniprotIDs <- uniprotIDs[!na.value] |
|
unique.uniprotIDs <- unique(uniprotIDs) |
|
if (length(unique.uniprotIDs) == 1) { |
|
PRROC.dnv.df <- data.frame(uniprotID = unique.uniprotIDs, |
|
optimal.cutoff = NA, |
|
auc = 0.5) |
|
} else { |
|
PRROC.dnv.df <- data.frame(uniprotID = unique.uniprotIDs, |
|
optimal.cutoff = NA, |
|
auc = NA) |
|
for (i in 1:length(unique.uniprotIDs)) { |
|
|
|
if (sum(uniprotIDs==unique.uniprotIDs[i] & scores==neg.score) * |
|
sum(uniprotIDs==unique.uniprotIDs[i] & scores==1) > 0) { |
|
res <- roc.curve(scores.class0 = logits[scores==neg.score & uniprotIDs==unique.uniprotIDs[i]], |
|
scores.class1 = logits[scores==1 & uniprotIDs==unique.uniprotIDs[i]], |
|
curve=TRUE) |
|
if (res$auc < 0.5) { |
|
res <- roc.curve(scores.class1 = logits[scores==neg.score & uniprotIDs==unique.uniprotIDs[i]], |
|
scores.class0 = logits[scores==1 & uniprotIDs==unique.uniprotIDs[i]], |
|
curve=TRUE) |
|
} |
|
J_stats <- res$curve[,2] - res$curve[,1] |
|
optimal.cutoff <- res$curve[which(J_stats==max(J_stats))[1],3] |
|
PRROC.dnv.df$optimal.cutoff[i] <- optimal.cutoff |
|
PRROC.dnv.df$auc[i] <- res$auc |
|
print(paste0("optimal cutoff for ID ", unique.uniprotIDs[i], " = ", optimal.cutoff)) |
|
} else { |
|
print(paste0("Not enough data points to calculate the auc for ", unique.uniprotIDs[i])) |
|
} |
|
} |
|
} |
|
PRROC.dnv.df |
|
} |
|
|
|
|
|
plot.R2 <- function(scores, logits, bin=FALSE, filename=NA) { |
|
library(ggplot2) |
|
library(ggpubr) |
|
to.plot <- data.frame() |
|
if (is.null(dim(scores))) { |
|
to.plot <- rbind(to.plot, |
|
data.frame(measurement=scores, |
|
predicted=logits, |
|
label=paste0("assay.", 1))) |
|
} else { |
|
for (i in 1:dim(scores)[2]) { |
|
to.plot <- rbind(to.plot, |
|
data.frame(measurement=scores[,i], |
|
predicted=logits[,i], |
|
label=paste0("assay.", i))) |
|
} |
|
} |
|
if (!is.na(filename)) { |
|
p <- ggplot(to.plot, aes(x=measurement, y=predicted, col=label)) + |
|
geom_point(alpha=0.5) + |
|
stat_smooth(method = "lm", formula = y~x) + |
|
stat_regline_equation( |
|
aes(label = paste(..eq.label.., ..adj.rr.label.., sep = "~~~~")), |
|
formula = y~x |
|
) + theme_bw() |
|
ggsave(filename, plot=p, height=6, width=8) |
|
} |
|
to.plot |
|
R2 <- c() |
|
if (is.null(dim(scores))) { |
|
assays <- 1 |
|
|
|
na.value <- is.na(logits) | is.na(scores) |
|
logits <- logits[!na.value] |
|
scores <- scores[!na.value] |
|
if (bin) { |
|
library(PRROC) |
|
if (length(unique(logits)) > 1 & length(unique(scores)) > 1) { |
|
auc <- roc.curve(scores.class0 = logits[scores==0], |
|
scores.class1 = logits[scores==1])$auc |
|
if (auc < 0.5) { |
|
auc <- roc.curve(scores.class0 = logits[scores==1], |
|
scores.class1 = logits[scores==0])$auc |
|
} |
|
} else { |
|
auc <- NA |
|
} |
|
R2[1] <- auc |
|
} else { |
|
if (length(logits) > 2) { |
|
R2[1] <- cor.test(scores, logits, method='spearman')$estimate |
|
} else { |
|
R2[1] <- NA |
|
} |
|
} |
|
} else { |
|
assays <- dim(scores)[2] |
|
for (i in 1:assays) { |
|
|
|
na.value <- is.na(logits[,i]) | is.na(scores[,i]) |
|
logits.i <- logits[!na.value,i] |
|
scores.i <- scores[!na.value,i] |
|
if (bin) { |
|
library(PRROC) |
|
if (length(unique(logits)) > 1 & length(unique(scores)) > 1) { |
|
auc <- roc.curve(scores.class0 = logits.i[scores.i==0], |
|
scores.class1 = logits.i[scores.i==1])$auc |
|
if (auc < 0.5) { |
|
auc <- roc.curve(scores.class0 = logits.i[scores.i==1], |
|
scores.class1 = logits.i[scores.i==0])$auc |
|
} |
|
} else { |
|
auc <- NA |
|
} |
|
R2[i] <- auc |
|
} else { |
|
if (length(scores.i) > 2) { |
|
R2[i] <- cor.test(scores.i, logits.i, method='spearman')$estimate |
|
} else { |
|
R2[i] <- NA |
|
} |
|
} |
|
} |
|
} |
|
result <- list(to.plot=to.plot, |
|
R2=R2) |
|
} |
|
|
|
|
|
soft.max <- function(logits) { |
|
res <- exp(logits) / sum(exp(logits)) |
|
res |
|
} |
|
|