|
import numpy as np |
|
import pandas as pd |
|
|
|
import scipy.stats as stats |
|
import statsmodels.api as sm |
|
from statsmodels.formula.api import ols |
|
from statsmodels.regression.linear_model import RegressionResultsWrapper |
|
from statsmodels.stats.multicomp import pairwise_tukeyhsd |
|
|
|
from matplotlib.figure import Figure |
|
import seaborn as sns |
|
import panel as pn |
|
|
|
import com_const as cc |
|
import com_func as cf |
|
import com_image as ci |
|
|
|
stars = [-np.log(0.05), -np.log(0.01), -np.log(0.001), -np.log(0.0001)] |
|
|
|
|
|
def plot_single_progression( |
|
ax, |
|
df, |
|
target, |
|
title: str, |
|
hue="gen", |
|
style="gen", |
|
show_legend: bool = False, |
|
): |
|
lp = sns.lineplot( |
|
df.sort_values(hue), |
|
x="dpi", |
|
y=target, |
|
hue=hue, |
|
markers=True, |
|
style=style, |
|
dashes=False, |
|
palette="tab10", |
|
markersize=12, |
|
ax=ax, |
|
) |
|
lp.set_yticklabels(["", "3", "", "5", "", "7", "", "9"]) |
|
ax.set_title(title) |
|
if show_legend is True: |
|
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1)) |
|
else: |
|
ax.get_legend().set_visible(False) |
|
|
|
|
|
def get_model( |
|
df: pd.DataFrame, target: str, formula: str, dpi: int = None |
|
) -> RegressionResultsWrapper: |
|
df_ = df[df.dpi == dpi] if dpi is not None else df |
|
return ols(f"{target} {formula}", data=df_).fit() |
|
|
|
|
|
def anova_table(aov, add_columns: bool = True): |
|
""" |
|
The function below was created specifically for the one-way ANOVA table |
|
results returned for Type II sum of squares |
|
""" |
|
if add_columns is True: |
|
aov["mean_sq"] = aov[:]["sum_sq"] / aov[:]["df"] |
|
|
|
aov["eta_sq"] = aov[:-1]["sum_sq"] / sum(aov["sum_sq"]) |
|
|
|
aov["omega_sq"] = ( |
|
aov[:-1]["sum_sq"] - (aov[:-1]["df"] * aov["mean_sq"][-1]) |
|
) / (sum(aov["sum_sq"]) + aov["mean_sq"][-1]) |
|
|
|
cols = ["sum_sq", "df", "mean_sq", "F", "PR(>F)", "eta_sq", "omega_sq"] |
|
aov = aov[cols] |
|
return aov |
|
|
|
|
|
def plot_assumptions(models: list, titles: list, figsize=(12, 4)): |
|
fig = Figure(figsize=figsize) |
|
fig.suptitle("Probability plot of model residual's", fontsize="x-large") |
|
axii = fig.subplots(1, len(models)) |
|
for ax, model, title in zip(axii, models, titles): |
|
_ = stats.probplot(model.resid, plot=ax, rvalue=True) |
|
ax.set_title(title) |
|
|
|
return fig |
|
|
|
|
|
def hghlight_rejection(s): |
|
df = pd.DataFrame(columns=s.columns, index=s.index) |
|
df.loc[s["reject_pred"].ne(s["reject_obs"]), ["group1", "group2"]] = ( |
|
"background: red" |
|
) |
|
df.loc[s["reject_pred"].eq(s["reject_obs"]), ["group1", "group2"]] = ( |
|
"background: green" |
|
) |
|
df.loc[s.reject_pred, ["reject_pred"]] = "background: green" |
|
df.loc[~s.reject_pred, ["reject_pred"]] = "background: red" |
|
df.loc[s.reject_obs, ["reject_obs"]] = "background: green" |
|
df.loc[~s.reject_obs, ["reject_obs"]] = "background: red" |
|
return df |
|
|
|
|
|
def get_tuckey_df(endog, groups, df_genotypes) -> pd.DataFrame: |
|
tukey = pairwise_tukeyhsd(endog=endog, groups=groups) |
|
df_tuc = pd.DataFrame(tukey._results_table) |
|
df_tuc.columns = [str(c) for c in df_tuc.iloc[0]] |
|
ret = ( |
|
df_tuc.drop(df_tuc.index[0]) |
|
.assign(group1=lambda s: s.group1.astype(str)) |
|
.assign(group2=lambda s: s.group2.astype(str)) |
|
.assign(reject=lambda s: s.reject.astype(str) == "True") |
|
) |
|
ret["p-adj"] = tukey.pvalues |
|
if df_genotypes is None: |
|
return ret |
|
else: |
|
return ( |
|
ret.merge(right=df_genotypes, how="left", left_on="group1", right_on="gen") |
|
.drop(["gen"], axis=1) |
|
.rename(columns={"rpvloci": "group1_rpvloci"}) |
|
.merge(right=df_genotypes, how="left", left_on="group2", right_on="gen") |
|
.drop(["gen"], axis=1) |
|
.rename(columns={"rpvloci": "group2_rpvloci"}) |
|
) |
|
|
|
|
|
def get_tuckey_compare(df, df_genotypes=None, groups: str = "gen"): |
|
merge_on = ( |
|
["group1", "group2"] |
|
if df_genotypes is None |
|
else ["group1", "group2", "group1_rpvloci", "group2_rpvloci"] |
|
) |
|
df_poiv = get_tuckey_df(df.p_oiv, df[groups], df_genotypes=df_genotypes) |
|
df_oiv = get_tuckey_df(df.oiv, df[groups], df_genotypes=df_genotypes) |
|
df = pd.merge(left=df_poiv, right=df_oiv, on=merge_on, suffixes=["_pred", "_obs"]) |
|
return df |
|
|
|
|
|
def df_tukey_cmp_plot(df, groups): |
|
df_tukey = ( |
|
get_tuckey_compare(df=df, groups=groups, df_genotypes=None) |
|
.assign(pair_groups=lambda s: s.group1 + "\n" + s.group2) |
|
.sort_values("p-adj_obs") |
|
) |
|
|
|
df_tukey_reject = df_tukey[df_tukey.reject_obs & df_tukey.reject_pred] |
|
df_tukey_accept = df_tukey[~df_tukey.reject_obs & ~df_tukey.reject_pred] |
|
df_tukey_diverge = df_tukey[df_tukey.reject_obs != df_tukey.reject_pred] |
|
|
|
fig = Figure(figsize=(20, 6)) |
|
ax_reject, ax_diverge, ax_accept = fig.subplots( |
|
1, |
|
3, |
|
gridspec_kw={ |
|
"width_ratios": [ |
|
len(df_tukey_reject), |
|
len(df_tukey_diverge), |
|
len(df_tukey_accept), |
|
] |
|
}, |
|
sharey=True, |
|
) |
|
|
|
for ax in [ax_reject, ax_accept, ax_diverge]: |
|
ax.set_yticks(ticks=stars, labels=["*", "**", "***", "****"]) |
|
ax.grid(False) |
|
|
|
ax_reject.set_title("Rejected") |
|
ax_diverge.set_title("Conflict") |
|
ax_accept.set_title("Accepted") |
|
|
|
for ax, df in zip( |
|
[ax_reject, ax_accept, ax_diverge], |
|
[df_tukey_reject, df_tukey_accept, df_tukey_diverge], |
|
): |
|
for star in stars: |
|
ax.axhline(y=star, linestyle="-", color="black", alpha=0.5) |
|
ax.bar( |
|
x=df["pair_groups"], |
|
height=-np.log(df["p-adj_pred"]), |
|
width=-0.4, |
|
align="edge", |
|
color="green", |
|
label="predictions", |
|
) |
|
ax.bar( |
|
x=df["pair_groups"], |
|
height=-np.log(df["p-adj_obs"]), |
|
width=0.4, |
|
align="edge", |
|
color="blue", |
|
label="scorings", |
|
) |
|
ax.margins(0.01) |
|
|
|
ax_accept.legend(loc="upper left", bbox_to_anchor=[0, 1], ncols=1, fancybox=True) |
|
ax_reject.set_ylabel("-log(p value)") |
|
ax_reject.tick_params(axis="y", which="major", labelsize=16) |
|
|
|
fig.subplots_adjust(wspace=0.05, hspace=0.05) |
|
|
|
return fig |
|
|
|
|
|
def plot_patches(df, diff_only: bool = True): |
|
if diff_only is True: |
|
df = df[(df.oiv != df.p_oiv)] |
|
df = df.assign(diff=lambda s: s.oiv != s.p_oiv).sort_values( |
|
["diff", "oiv", "p_oiv"] |
|
) |
|
return pn.GridBox( |
|
*[ |
|
pn.Column( |
|
pn.pane.Markdown(f"### {row.file_name}|{row.oiv}->p{row.p_oiv}"), |
|
pn.pane.Image( |
|
object=ci.enhance_pil_image( |
|
image=ci.load_image( |
|
file_name=row.file_name, |
|
path_to_images=cc.path_to_leaf_patches, |
|
), |
|
brightness=1.5, |
|
) |
|
), |
|
) |
|
for _, row in df.iterrows() |
|
], |
|
ncols=len(df), |
|
) |
|
|