PreMode / analysis /AUROC.R
gzhong's picture
Upload folder using huggingface_hub
7718235 verified
plot.AUC <- function(scores, logits, filename=NA, rev.ok=F) {
library(PRROC)
all.scores <- unique(scores)
if (0 %in% all.scores & ! -1 %in% all.scores) {
neg.score <- 0
} else if (-1 %in% all.scores) {
neg.score <- -1
} else {
neg.score <- NA
}
if (3 %in% all.scores & !1 %in% all.scores) {
pos.score <- 3
} else {
pos.score <- 1
}
# remove NA value
na.value <- is.na(scores) | is.na(logits)
if (sum(na.value) == length(scores)) {
PRROC.dnv.df <- list(auc=NA, cutoff=NA)
} else {
scores <- scores[!na.value]
logits <- logits[!na.value]
sel.value <- scores %in% c(neg.score, pos.score)
scores <- scores[sel.value]
logits <- logits[sel.value]
if (length(unique(logits)) == 1 | length(unique(scores)) == 1 | length(logits)==0) {
PRROC.dnv.df <- list(auc=NA,
cutoff=NA,
curve=matrix(-Inf, nrow = 1, ncol = 3))
} else {
# pr_curve(two_class_example, truth, Class1)
PRROC.dnv.df <- roc.curve(scores.class0 = logits[scores==neg.score],
scores.class1 = logits[scores==pos.score],
curve=TRUE)
if (rev.ok & PRROC.dnv.df$auc < 0.5) {
PRROC.dnv.df <- roc.curve(scores.class1 = logits[scores==neg.score],
scores.class0 = logits[scores==pos.score],
curve=TRUE)
}
if (!is.na(filename)) {
pdf(filename)
plot(PRROC.dnv.df)
dev.off()
}
J_stats <- PRROC.dnv.df$curve[,2] - PRROC.dnv.df$curve[,1]
optimal.cutoff <- PRROC.dnv.df$curve[which(J_stats==max(J_stats))[1],3]
PRROC.dnv.df$cutoff <- optimal.cutoff
print(paste0("optimal cutoff = ", optimal.cutoff))
PRROC.dnv.df
}
}
PRROC.dnv.df
}
plot.PR <- function(scores, logits, filename=NA) {
library(PRROC)
all.scores <- unique(scores)
if (0 %in% all.scores & ! -1 %in% all.scores) {
neg.score <- 0
} else if (-1 %in% all.scores) {
neg.score <- -1
}
# remove NA value
na.value <- is.na(scores) | is.na(logits)
scores <- scores[!na.value]
logits <- logits[!na.value]
sel.value <- scores %in% c(neg.score, 1)
scores <- scores[sel.value]
logits <- logits[sel.value]
if (length(unique(logits)) == 1) {
PRROC.dnv.df <- list(auc=NA,
cutoff=NA,
curve=matrix(-Inf, nrow = 1, ncol = 3))
} else {
# pr_curve(two_class_example, truth, Class1)
PRROC.dnv.df <- pr.curve(scores.class0 = logits[scores==neg.score],
scores.class1 = logits[scores==1],
curve=TRUE)
if (PRROC.dnv.df$auc.integral <= 0.5) {
PRROC.dnv.df <- pr.curve(scores.class0 = logits[scores==1],
scores.class1 = logits[scores==neg.score],
curve=TRUE)
}
if (!is.na(filename)) {
pdf(filename)
plot(PRROC.dnv.df)
dev.off()
}
F1 <- 2 * PRROC.dnv.df$curve[,2] * PRROC.dnv.df$curve[,1] / (PRROC.dnv.df$curve[,2] + PRROC.dnv.df$curve[,1])
optimal.cutoff <- PRROC.dnv.df$curve[which(F1==max(F1))[1],3]
PRROC.dnv.df$cutoff <- optimal.cutoff
PRROC.dnv.df$auc <- PRROC.dnv.df$auc.integral
print(paste0("optimal cutoff = ", optimal.cutoff))
PRROC.dnv.df
}
}
plot.AUC.by.uniprotID <- function(scores, logits, uniprotIDs) {
library(PRROC)
all.scores <- unique(scores)
if (0 %in% all.scores & ! -1 %in% all.scores) {
neg.score <- 0
} else if (-1 %in% all.scores) {
neg.score <- -1
}
# remove NA value
na.value <- is.na(scores) | is.na(logits) | is.na(uniprotIDs)
scores <- scores[!na.value]
logits <- logits[!na.value]
uniprotIDs <- uniprotIDs[!na.value]
unique.uniprotIDs <- unique(uniprotIDs)
if (length(unique.uniprotIDs) == 1) {
PRROC.dnv.df <- data.frame(uniprotID = unique.uniprotIDs,
optimal.cutoff = NA,
auc = 0.5)
} else {
PRROC.dnv.df <- data.frame(uniprotID = unique.uniprotIDs,
optimal.cutoff = NA,
auc = NA)
for (i in 1:length(unique.uniprotIDs)) {
# pr_curve(two_class_example, truth, Class1)
if (sum(uniprotIDs==unique.uniprotIDs[i] & scores==neg.score) *
sum(uniprotIDs==unique.uniprotIDs[i] & scores==1) > 0) {
res <- roc.curve(scores.class0 = logits[scores==neg.score & uniprotIDs==unique.uniprotIDs[i]],
scores.class1 = logits[scores==1 & uniprotIDs==unique.uniprotIDs[i]],
curve=TRUE)
if (res$auc < 0.5) {
res <- roc.curve(scores.class1 = logits[scores==neg.score & uniprotIDs==unique.uniprotIDs[i]],
scores.class0 = logits[scores==1 & uniprotIDs==unique.uniprotIDs[i]],
curve=TRUE)
}
J_stats <- res$curve[,2] - res$curve[,1]
optimal.cutoff <- res$curve[which(J_stats==max(J_stats))[1],3]
PRROC.dnv.df$optimal.cutoff[i] <- optimal.cutoff
PRROC.dnv.df$auc[i] <- res$auc
print(paste0("optimal cutoff for ID ", unique.uniprotIDs[i], " = ", optimal.cutoff))
} else {
print(paste0("Not enough data points to calculate the auc for ", unique.uniprotIDs[i]))
}
}
}
PRROC.dnv.df
}
plot.R2 <- function(scores, logits, bin=FALSE, filename=NA) {
library(ggplot2)
library(ggpubr)
to.plot <- data.frame()
if (is.null(dim(scores))) {
to.plot <- rbind(to.plot,
data.frame(measurement=scores,
predicted=logits,
label=paste0("assay.", 1)))
} else {
for (i in 1:dim(scores)[2]) {
to.plot <- rbind(to.plot,
data.frame(measurement=scores[,i],
predicted=logits[,i],
label=paste0("assay.", i)))
}
}
if (!is.na(filename)) {
p <- ggplot(to.plot, aes(x=measurement, y=predicted, col=label)) +
geom_point(alpha=0.5) +
stat_smooth(method = "lm", formula = y~x) +
stat_regline_equation(
aes(label = paste(..eq.label.., ..adj.rr.label.., sep = "~~~~")),
formula = y~x
) + theme_bw()
ggsave(filename, plot=p, height=6, width=8)
}
to.plot
R2 <- c()
if (is.null(dim(scores))) {
assays <- 1
# check NA scores
na.value <- is.na(logits) | is.na(scores)
logits <- logits[!na.value]
scores <- scores[!na.value]
if (bin) {
library(PRROC)
if (length(unique(logits)) > 1 & length(unique(scores)) > 1) {
auc <- roc.curve(scores.class0 = logits[scores==0],
scores.class1 = logits[scores==1])$auc
if (auc < 0.5) {
auc <- roc.curve(scores.class0 = logits[scores==1],
scores.class1 = logits[scores==0])$auc
}
} else {
auc <- NA
}
R2[1] <- auc
} else {
if (length(logits) > 2) {
R2[1] <- cor.test(scores, logits, method='spearman')$estimate
} else {
R2[1] <- NA
}
}
} else {
assays <- dim(scores)[2]
for (i in 1:assays) {
# check NA scores
na.value <- is.na(logits[,i]) | is.na(scores[,i])
logits.i <- logits[!na.value,i]
scores.i <- scores[!na.value,i]
if (bin) {
library(PRROC)
if (length(unique(logits)) > 1 & length(unique(scores)) > 1) {
auc <- roc.curve(scores.class0 = logits.i[scores.i==0],
scores.class1 = logits.i[scores.i==1])$auc
if (auc < 0.5) {
auc <- roc.curve(scores.class0 = logits.i[scores.i==1],
scores.class1 = logits.i[scores.i==0])$auc
}
} else {
auc <- NA
}
R2[i] <- auc
} else {
if (length(scores.i) > 2) {
R2[i] <- cor.test(scores.i, logits.i, method='spearman')$estimate
} else {
R2[i] <- NA
}
}
}
}
result <- list(to.plot=to.plot,
R2=R2)
}
soft.max <- function(logits) {
res <- exp(logits) / sum(exp(logits))
res
}