|
|
|
|
|
|
|
|
|
|
|
|
|
ground.truth <- read.csv(paste0('../data.files/PTEN/assay.1.csv'), row.names = 1) |
|
|
|
all.premode <- list(c(), c(), c(), c(), c(), c(), c(), c()) |
|
all.baseline <- list(c(), c(), c(), c(), c(), c(), c(), c()) |
|
all.replicates <- list() |
|
for (r in 1:8) { |
|
replicate <- read.csv(paste0('PreMode/PTEN/replicate.', r, '.csv')) |
|
training <- read.csv(paste0('../data.files/PTEN.replicate.rest.', |
|
r, '/training.csv'), row.names = 1) |
|
|
|
replicate$baseline <- NA |
|
replicate$observations <- NA |
|
replicate.unique <- replicate[!duplicated(replicate$aaChg),] |
|
for (i in 1:dim(replicate.unique)[1]) { |
|
baseline <- training[training$aaChg == replicate.unique$aaChg[i],] |
|
replicate.baseline <- replicate[replicate$aaChg == replicate.unique$aaChg[i] & !is.na(replicate$score),] |
|
replicate.unique$base.line.1[i] <- mean(baseline$score, na.rm=T) |
|
replicate.unique$base.line.2[i] <- mean(replicate.baseline$score, na.rm=T) |
|
replicate.unique$ground.truth[i] <- ground.truth$score[ground.truth$VarID==replicate.unique$aaChg[i]] |
|
replicate.unique$observations[i] <- dim(baseline)[1] + dim(replicate.baseline)[1] |
|
replicate.unique$other.observations[i] <- dim(replicate.baseline)[1] |
|
} |
|
|
|
premode <- mean((replicate.unique$base.line.2 - replicate.unique$logits)^2, na.rm = T) |
|
baseline <- mean((replicate.unique$base.line.1 - replicate.unique$base.line.2)^2, na.rm = T) |
|
all.replicates[[r]] <- replicate.unique |
|
all.premode[[1]] <- c(all.premode[[1]], premode) |
|
all.baseline[[1]] <- c(all.baseline[[1]], baseline) |
|
|
|
for (i in min(replicate.unique$other.observations, na.rm = T):max(replicate.unique$other.observations, na.rm = T)) { |
|
premode <- mean((replicate.unique$ground.truth[replicate.unique$other.observations==i] - |
|
replicate.unique$logits[replicate.unique$other.observations==i])^2, na.rm = T) |
|
baseline <- mean((replicate.unique$base.line.1[replicate.unique$other.observations==i] - |
|
replicate.unique$ground.truth[replicate.unique$other.observations==i])^2, na.rm = T) |
|
all.premode[[i+1]] <- c(all.premode[[i+1]], premode) |
|
all.baseline[[i+1]] <- c(all.baseline[[i+1]], baseline) |
|
} |
|
print(paste0('replicate ', r, ', PreMode: ', all.premode[[1]], ', Baseline: ', all.baseline[[1]])) |
|
} |
|
npoints <- table(all.replicates[[1]]$other.observations) |
|
npoints <- c(sum(npoints), npoints) |
|
names(npoints)[1] <- 'all' |
|
to.plot <- data.frame(RMSE=sqrt(c(unlist(all.premode), |
|
unlist(all.baseline))), |
|
exp = rep(rep(1:8, 8), 2), |
|
replicate=paste0(rep(rep(names(npoints), each=8), 2), " : ", |
|
rep(rep(npoints, each=8), 2)), |
|
model=c(rep("PreMode", length(names(npoints))*8), rep("Experiment", length(npoints)*8))) |
|
library(ggplot2) |
|
|
|
|
|
library(ggpubr) |
|
diff.plots <- list() |
|
diff.plots.2 <- list() |
|
for (r in 1:length(all.replicates)) { |
|
all.replicates[[r]]$Experiment.PreMode.diff <- (all.replicates[[r]]$base.line.1 - all.replicates[[r]]$logits) |
|
all.replicates[[r]]$Experiment.Groundtruth.diff <- (all.replicates[[r]]$base.line.1 - all.replicates[[r]]$ground.truth) |
|
diff.plots[[r]] <- ggplot(all.replicates[[r]], aes(x=Experiment.PreMode.diff, y=Experiment.Groundtruth.diff, col=observations)) + |
|
geom_smooth(method='lm', formula= y~x) + |
|
stat_regline_equation( |
|
aes(label = paste(after_stat(eq.label), after_stat(adj.rr.label), sep = "~~~~")), |
|
formula = y~x |
|
) + |
|
geom_point(alpha=0.3) + xlab('Measurement - PreMode') + ylab('Measurement - Groundtruth') + |
|
scale_color_gradientn(colours = c("red", "white", "blue")) + |
|
ggtitle(paste0("Train on Experiment ", r)) + |
|
theme_bw() + ggeasy::easy_center_title() |
|
scl <- max(all.replicates[[r]]$logits, na.rm = T) - min(all.replicates[[r]]$logits, na.rm = T) |
|
|
|
all.replicates[[r]]$Experiment.PreMode.diff.bin <- 'Measurement\n~ PreMode' |
|
all.replicates[[r]]$Experiment.PreMode.diff.bin[all.replicates[[r]]$Experiment.PreMode.diff>=scl/2] <- 'Measurement\n> PreMode' |
|
all.replicates[[r]]$Experiment.PreMode.diff.bin[all.replicates[[r]]$Experiment.PreMode.diff<=-scl/2] <- 'Measurement\n< PreMode' |
|
all.replicates[[r]]$Experiment.PreMode.diff.bin <- factor(all.replicates[[r]]$Experiment.PreMode.diff.bin, levels=c('Measurement\n< PreMode', 'Measurement\n~ PreMode', 'Measurement\n> PreMode')) |
|
diff.plots.2[[r]] <- ggplot(all.replicates[[r]], aes(x=Experiment.PreMode.diff.bin, y=Experiment.Groundtruth.diff, col=Experiment.PreMode.diff.bin)) + |
|
geom_violin() + |
|
geom_boxplot(width=0.2) + |
|
|
|
ggtitle(paste0("Train on Experiment ", r)) + labs(col='Variant Groups') + xlab('Measurement - PreMode') + ylab('Measurement - Groundtruth') + |
|
theme_bw() + ggeasy::easy_center_title() |
|
print(cor.test(all.replicates[[r]]$Experiment.PreMode.diff, all.replicates[[r]]$Experiment.Groundtruth.diff)$estimate) |
|
} |
|
|
|
library(patchwork) |
|
p4 <- diff.plots[[1]] + diff.plots[[2]] + diff.plots[[3]] + diff.plots[[4]] + |
|
diff.plots[[5]] + diff.plots[[6]] + diff.plots[[7]] + diff.plots[[8]] + patchwork::plot_layout(ncol=4) |
|
p5 <- diff.plots.2[[1]] + diff.plots.2[[2]] + diff.plots.2[[3]] + diff.plots.2[[4]] + |
|
diff.plots.2[[5]] + diff.plots.2[[6]] + diff.plots.2[[7]] + diff.plots.2[[8]] + patchwork::plot_layout(ncol=4) |
|
|
|
ggsave(filename = 'figs/fig.sup.5a.pdf', p4, width = 20, height = 7.5) |
|
ggsave(filename = 'figs/fig.sup.5b.pdf', p5, width = 20, height = 7.5) |
|
|
|
|