PreMode / analysis /fig.sup.3.R
gzhong's picture
Upload folder using huggingface_hub
7718235 verified
genes <- c('PTEN', 'NUDT15', 'SNCA', 'CYP2C9', 'GCK', 'ASPA', 'CCR5', 'CXCR4')
stab.assay <- c(1, 1, 2, 2, 2, 1, 1, 1)
task.dic <- list("PTEN"=c("score.1"="stability", "score.2"="enzyme.activity"),
"NUDT15"=c("score.1"="stability", "score.2"="enzyme.activity"),
"VKORC1"=c("score.1"="enzyme.activity", "score.2"="stability"),
"CCR5"=c("score.1"="stability", "score.2"="binding Ab2D7", "score.3"="binding HIV-1"),
"CXCR4"=c("score.1"="stability", "score.2"="binding CXCL12", "score.3"="binding Ab12G5"),
"SNCA"=c("score.1"="enzyme.activity", "score.2"="stability"),
"CYP2C9"=c("score.1"="enzyme.activity", "score.2"="stability"),
"GCK"=c("score.1"="enzyme.activity", "score.2"="stability"),
"ASPA"=c("score.1"="stability", "score.2"="enzyme.activity")
result <- NULL
sp.stats <- NULL
pr.stats <- NULL
all.plots <- list()
k = 1
for (i in 1:length(genes)) {
assay <- read.csv(paste0('../data.files/', genes[i], '/ALL.annotated.csv'))
# test the correlation between stab and foldx_ddG
stab.score.columns <- paste0('score.', stab.assay[i])
stab.corr <- abs(cor.test(assay$FoldXddG, assay[,stab.score.columns])$estimate)
other.score.columns <- colnames(assay)[startsWith(colnames(assay), 'score')]
other.score.columns <- other.score.columns[!other.score.columns %in% stab.score.columns]
other.corr <- NULL
for (c in other.score.columns) {
other.corr <- c(other.corr, abs(cor.test(assay$RosettaddG, assay[,c])$estimate))
other.corr <- mean(other.corr, na.rm = T)
result <- rbind(result,
if (genes[i] == 'ASPA') {
assay[,other.score.columns] <- -assay[,other.score.columns]
x.pos <- 'right'
y.pos <- 'bottom'
} else {
x.pos <- 'left'
y.pos <- 'top'
# plot scatter plot of stability and other assay
for (c in other.score.columns) {
sp.stats[k] <- cor.test(assay[,stab.score.columns],
assay[,c], method = 'spearman')$estimate
pr.stats[k] <- cor.test(assay[,stab.score.columns],
assay[,c], method = 'pearson')$estimate
p <- ggplot(assay, aes_string(x=stab.score.columns, y=c)) +
geom_point(alpha=0.2, color='grey') +
geom_density_2d(color='gray1') +
stat_smooth(method = "lm", formula = y~x, color='blue') +
ggpp::geom_text_npc(data=data.frame(x=x.pos, y=y.pos,
label=paste0("Pearson r=", signif(pr.stats[k], digits = 2),
"\nSpearman rho=", signif(sp.stats[k], digits = 2))),
aes(npcx=x, npcy=y, label=label),
col='black') +
ggtitle(genes[i]) +
xlab(task.dic[[genes[i]]][stab.score.columns]) +
ylab(task.dic[[genes[i]]][c]) +
theme_bw() + ggeasy::easy_center_title()
all.plots[[k]] <- p
k <- k + 1
# make plot
p <- (all.plots[[1]] + all.plots[[2]] + all.plots[[3]]) /
(all.plots[[4]] + all.plots[[5]] + all.plots[[6]]) /
(all.plots[[7]] + all.plots[[8]] + all.plots[[9]] + all.plots[[10]] + plot_layout(ncol = 4))
ggsave('figs/fig.sup.3.pdf', p, height = 10, width = 10)