Spaces:

wissamantoun
/

LLM_Detection_Attribution

Running

App Files Files Community

LLM_Detection_Attribution / app.py

wissamantoun

Update app.py

3b165bd verified 9 months ago

raw

history blame contribute delete

22.9 kB

	import json
	from typing import Tuple

	import numpy as np
	import pandas as pd
	import plotly.express as px
	import plotly.figure_factory as ff
	import plotly.graph_objects as go
	import streamlit as st
	from plotly.subplots import make_subplots

	from exp_utils import MODELS
	from visualize_utils import viridis_rgb

	st.set_page_config(
	page_title="Results Viewer",
	page_icon="📊",
	initial_sidebar_state="expanded",
	layout="wide",
	)

	MODELS_SIZE_MAPPING = {k: v["model_size"] for k, v in MODELS.items()}
	MODELS_FAMILY_MAPPING = {k: v["model_family"] for k, v in MODELS.items()}
	MODEL_FAMILES = set([model["model_family"] for model in MODELS.values()])
	Q_W_MODELS = [
	"llama-7b",
	"llama-2-7b",
	"llama-13b",
	"llama-2-13b",
	"llama-30b",
	"llama-65b",
	"llama-2-70b",
	]
	Q_W_MODELS = [f"{model}_quantized" for model in Q_W_MODELS] + [
	f"{model}_watermarked" for model in Q_W_MODELS
	]

	MODEL_NAMES = list(MODELS.keys()) + Q_W_MODELS

	MODEL_NAMES_SORTED_BY_NAME_AND_SIZE = sorted(
	MODEL_NAMES,
	key=lambda x: (
	MODELS[x.replace("_quantized", "").replace("_watermarked", "")]["model_family"],
	MODELS[x.replace("_quantized", "").replace("_watermarked", "")]["model_size"],
	),
	)

	MODEL_NAMES_SORTED_BY_SIZE = sorted(
	MODEL_NAMES,
	key=lambda x: (
	MODELS[x.replace("_quantized", "").replace("_watermarked", "")]["model_size"],
	MODELS[x.replace("_quantized", "").replace("_watermarked", "")]["model_family"],
	),
	)


	# sort MODELS_SIZE_MAPPING by value then by key
	MODELS_SIZE_MAPPING = {
	k: v
	for k, v in sorted(MODELS_SIZE_MAPPING.items(), key=lambda item: (item[1], item[0]))
	}

	MODELS_SIZE_MAPPING_LIST = list(MODELS_SIZE_MAPPING.keys())


	CHAT_MODELS = [
	x
	for x in MODEL_NAMES_SORTED_BY_NAME_AND_SIZE
	if MODELS[x.replace("_quantized", "").replace("_watermarked", "")]["is_chat"]
	]


	def clean_dataframe(df: pd.DataFrame) -> pd.DataFrame:
	# remove all columns that have "_loss" and "_runtime" in them
	words_to_remove = [
	"epoch",
	"loss",
	"runtime",
	"samples_per_second",
	"steps_per_second",
	"samples",
	"results_dir",
	]
	df = df.loc[
	:,
	~df.columns.str.contains("\|".join(words_to_remove), case=False, regex=True),
	]

	# rename the rest of the columns by replacing "_roc_auc" with ""
	df.columns = df.columns.str.replace("_roc_auc", "")
	df.columns = df.columns.str.replace("eval_", "")

	df["model_family"] = df["model_name"].apply(
	lambda x: MODELS_FAMILY_MAPPING[
	x.replace("_quantized", "").replace("_watermarked", "")
	]
	)
	# create a dict with the model_name and the model_family
	model_family_dict = {
	k: v
	for k, v in zip(
	df["model_name"].values.tolist(), df["model_family"].values.tolist()
	)
	}

	# average the results over the 5 seeds for each model (seed column is exp_seed)
	df_avg = df.groupby(["model_name"]).mean()
	df_std = df.groupby(["model_name"]).std()

	# remove the exp_seed column
	df_avg = df_avg.drop(columns=["exp_seed"])
	df_std = df_std.drop(columns=["exp_seed"])
	df_avg["model_family"] = df_avg.index.map(model_family_dict)
	df_std["model_family"] = df_std.index.map(model_family_dict)
	df_avg["model_size"] = df_avg.index.map(
	lambda x: MODELS_SIZE_MAPPING[
	x.replace("_quantized", "").replace("_watermarked", "")
	]
	)
	df_std["model_size"] = df_std.index.map(
	lambda x: MODELS_SIZE_MAPPING[
	x.replace("_quantized", "").replace("_watermarked", "")
	]
	)

	# sort rows by model family then model size
	df_avg = df_avg.sort_values(
	by=["model_family", "model_size"], ascending=[True, True]
	)
	df_std = df_std.sort_values(
	by=["model_family", "model_size"], ascending=[True, True]
	)

	availables_rows = [x for x in df_avg.columns if x in df_avg.index]
	df_avg = df_avg.reindex(availables_rows)

	availables_rows = [x for x in df_std.columns if x in df_std.index]
	df_std = df_std.reindex(availables_rows)

	df_avg["is_quantized"] = df_avg.index.str.contains("quantized")
	df_avg["is_watermarked"] = df_avg.index.str.contains("watermarked")
	df_std["is_quantized"] = df_std.index.str.contains("quantized")
	df_std["is_watermarked"] = df_std.index.str.contains("watermarked")

	return df_avg, df_std


	def get_data(path) -> Tuple[pd.DataFrame, pd.DataFrame]:
	df, df_std = clean_dataframe(pd.read_csv(path, index_col=0))
	return df, df_std


	def filter_df(
	df: pd.DataFrame,
	model_family_train: list,
	model_family_test: list,
	model_size_train: tuple,
	model_size_test: tuple,
	is_chat_train: bool,
	is_chat_test: bool,
	is_quantized_train: bool,
	is_quantized_test: bool,
	is_watermarked_train: bool,
	is_watermarked_test: bool,
	sort_by_size: bool,
	split_chat_models: bool,
	split_quantized_models: bool,
	split_watermarked_models: bool,
	filter_empty_col_row: bool,
	is_debug: bool,
	) -> pd.DataFrame:
	# remove all columns and rows that have "pythia-70m" in the name

	# filter rows
	if is_debug:
	st.write("No filters")
	st.write(df)
	df = df.loc[
	(df["model_size"] >= model_size_train[0] * 1e9)
	& (df["model_size"] <= model_size_train[1] * 1e9)
	]
	if is_debug:
	st.write("Filter model size train")
	st.write(df)
	df = df.loc[df["model_family"].isin(model_family_train)]
	if is_debug:
	st.write("Filter model family train")
	st.write(df)
	if is_chat_train != "Both":
	df = df.loc[df["is_chat"] == is_chat_train]
	if is_debug:
	st.write("Filter is chat train")
	st.write(df)
	if is_quantized_train != "Both":
	df = df.loc[df["is_quantized"] == is_quantized_train]
	if is_debug:
	st.write("Filter is quantized train")
	st.write(df)
	if is_watermarked_train != "Both":
	df = df.loc[df["is_watermarked"] == is_watermarked_train]
	if is_debug:
	st.write("Filter is watermark train")
	st.write(df)

	# filter columns
	if is_debug:
	st.write("No filters")
	st.write(df)
	columns_to_keep = []
	for column in df.columns:
	if (
	column.replace("_quantized", "").replace("_watermarked", "")
	in MODELS.keys()
	):
	model_size = MODELS[
	column.replace("_quantized", "").replace("_watermarked", "")
	]["model_size"]
	if (
	model_size >= model_size_test[0] * 1e9
	and model_size <= model_size_test[1] * 1e9
	):
	columns_to_keep.append(column)

	df = df[list(sorted(list(set(columns_to_keep))))]
	if is_debug:
	st.write("Filter model size test")
	st.write(df)

	# filter columns
	columns_to_keep = []
	for column in df.columns:
	for model_family in model_family_test:
	if (
	model_family
	== MODELS[column.replace("_quantized", "").replace("_watermarked", "")][
	"model_family"
	]
	):
	columns_to_keep.append(column)
	df = df[list(sorted(list(set(columns_to_keep))))]
	if is_debug:
	st.write("Filter model family test")
	st.write(df)

	if is_chat_test != "Both":
	# filter columns
	columns_to_keep = []
	for column in df.columns:
	if (
	MODELS[column.replace("_quantized", "").replace("_watermarked", "")][
	"is_chat"
	]
	== is_chat_test
	):
	columns_to_keep.append(column)
	df = df[list(sorted(list(set(columns_to_keep))))]
	if is_debug:
	st.write("Filter is chat test")
	st.write(df)

	if is_quantized_test != "Both":
	# filter columns
	columns_to_keep = []
	for column in df.columns:
	if "quantized" in column and is_quantized_test:
	columns_to_keep.append(column)
	elif "quantized" not in column and not is_quantized_test:
	columns_to_keep.append(column)
	df = df[list(sorted(list(set(columns_to_keep))))]
	if is_debug:
	st.write("Filter is quantized test")
	st.write(df)

	if is_watermarked_test != "Both":
	# filter columns
	columns_to_keep = []
	for column in df.columns:
	if "watermark" in column and is_watermarked_test:
	columns_to_keep.append(column)
	elif "watermark" not in column and not is_watermarked_test:
	columns_to_keep.append(column)
	df = df[list(sorted(list(set(columns_to_keep))))]
	if is_debug:
	st.write("Filter is watermark test")
	st.write(df)

	df = df.select_dtypes(include="number")
	if is_debug:
	st.write("Select dtypes to be only numbers")
	st.write(df)

	if sort_by_size:
	columns_in = [x for x in MODEL_NAMES_SORTED_BY_SIZE if x in df.columns]
	else:
	columns_in = [x for x in MODEL_NAMES_SORTED_BY_NAME_AND_SIZE if x in df.columns]
	df = df[columns_in]
	if is_debug:
	st.write("Sort columns")
	st.write(df)

	# sort rows by size according the MODELS_SIZE_MAPPING_LIST
	if sort_by_size:
	availables_rows = [x for x in MODEL_NAMES_SORTED_BY_SIZE if x in df.index]
	df = df.reindex(availables_rows)
	else:
	availables_rows = [
	x for x in MODEL_NAMES_SORTED_BY_NAME_AND_SIZE if x in df.index
	]
	df = df.reindex(availables_rows)
	if is_debug:
	st.write("Sort rows")
	st.write(df)

	if split_chat_models:
	# put chat models at the end of the columns
	chat_models = [x for x in CHAT_MODELS if x in df.columns]
	# sort chat models by size
	chat_models = sorted(chat_models, key=lambda x: MODELS[x]["model_size"])
	df = df[[x for x in df.columns if x not in chat_models] + chat_models]

	# put chat models at the end of the rows
	chat_models = [x for x in CHAT_MODELS if x in df.index]
	# sort chat models by size
	chat_models = sorted(chat_models, key=lambda x: MODELS[x]["model_size"])
	df = df.reindex([x for x in df.index if x not in chat_models] + chat_models)
	if is_debug:
	st.write("Split chat models")
	st.write(df)

	if split_quantized_models:
	# put chat models at the end of the columns
	quantized_models = [
	x for x in Q_W_MODELS if x in df.columns and "quantized" in x
	]
	# sort chat models by size
	quantized_models = sorted(
	quantized_models,
	key=lambda x: MODELS[
	x.replace("_quantized", "").replace("_watermarked", "")
	]["model_size"],
	)
	df = df[[x for x in df.columns if x not in quantized_models] + quantized_models]

	# put chat models at the end of the rows
	quantized_models = [x for x in Q_W_MODELS if x in df.index and "quantized" in x]
	# sort chat models by size
	quantized_models = sorted(
	quantized_models,
	key=lambda x: MODELS[
	x.replace("_quantized", "").replace("_watermarked", "")
	]["model_size"],
	)
	df = df.reindex(
	[x for x in df.index if x not in quantized_models] + quantized_models
	)

	if split_watermarked_models:
	# put chat models at the end of the columns
	watermarked_models = [
	x for x in Q_W_MODELS if x in df.columns and "watermarked" in x
	]
	# sort chat models by size
	watermarked_models = sorted(
	watermarked_models,
	key=lambda x: MODELS[
	x.replace("_quantized", "").replace("_watermarked", "")
	]["model_size"],
	)
	df = df[
	[x for x in df.columns if x not in watermarked_models] + watermarked_models
	]

	# put chat models at the end of the rows
	watermarked_models = [
	x for x in Q_W_MODELS if x in df.index and "watermarked" in x
	]
	# sort chat models by size
	watermarked_models = sorted(
	watermarked_models,
	key=lambda x: MODELS[
	x.replace("_quantized", "").replace("_watermarked", "")
	]["model_size"],
	)
	df = df.reindex(
	[x for x in df.index if x not in watermarked_models] + watermarked_models
	)

	if is_debug:
	st.write("Split chat models")
	st.write(df)

	if filter_empty_col_row:
	# remove all for which the row and column are Nan
	df = df.dropna(axis=0, how="all")
	df = df.dropna(axis=1, how="all")
	return df


	df, df_std = get_data("./deberta_results.csv")
	df_q_w, df_std_q_w = get_data("./results_qantized_watermarked.csv")

	df = df.merge(
	df_q_w[
	df_q_w.columns[
	df_q_w.columns.str.contains("quantized\|watermarked", case=False, regex=True)
	]
	],
	how="outer",
	left_index=True,
	right_index=True,
	)
	df_std = df_std.merge(
	df_std_q_w[
	df_std_q_w.columns[
	df_std_q_w.columns.str.contains(
	"quantized\|watermarked", case=False, regex=True
	)
	]
	],
	how="outer",
	left_index=True,
	right_index=True,
	)


	df.columns = df.columns.str.replace("_y", "", regex=True)
	df_std.columns = df_std.columns.str.replace("_y", "", regex=True)

	df = df.drop(columns=["is_quantized_x", "is_watermarked_x"])


	df.update(df_q_w)
	df_std.update(df_std_q_w)


	df["is_chat"].fillna(False, inplace=True)
	df_std["is_chat"].fillna(False, inplace=True)

	df["is_watermarked"].fillna(False, inplace=True)
	df_std["is_watermarked"].fillna(False, inplace=True)

	df["is_quantized"].fillna(False, inplace=True)
	df_std["is_quantized"].fillna(False, inplace=True)

	with open("./ood_results.json", "r") as f:
	ood_results = json.load(f)

	ood_results = pd.DataFrame(ood_results)
	ood_results = ood_results.set_index("model_name")
	ood_results = ood_results.drop(
	columns=["exp_name", "accuracy", "f1", "precision", "recall"]
	)
	ood_results.columns = ["seed", "Adversarial"]

	ood_results_avg = ood_results.groupby(["model_name"]).mean()
	ood_results_std = ood_results.groupby(["model_name"]).std()

	st.write(
	"""### Results Viewer 👇

	## From Text to Source: Results in Detecting Large Language Model-Generated Content

	### Wissam Antoun, Benoît Sagot, Djamé Seddah
	##### ALMAnaCH, Inria

	##### Paper: [https://arxiv.org/abs/2309.13322](https://arxiv.org/abs/2309.13322)
	"""
	)

	# filters
	show_diff = st.sidebar.checkbox("Show Diff", value=False)
	sort_by_size = st.sidebar.checkbox("Sort by size", value=True)
	split_chat_models = st.sidebar.checkbox("Split chat models", value=True)
	split_quantized_models = st.sidebar.checkbox("Split quantized models", value=True)
	split_watermarked_models = st.sidebar.checkbox("Split watermarked models", value=True)
	add_mean = st.sidebar.checkbox("Add mean", value=False)
	show_std = st.sidebar.checkbox("Show std", value=False)
	filter_empty_col_row = st.sidebar.checkbox("Filter empty col/row", value=True)
	model_size_train = st.sidebar.slider(
	"Train Model Size in Billion", min_value=0, max_value=100, value=(0, 100), step=1
	)
	model_size_test = st.sidebar.slider(
	"Test Model Size in Billion", min_value=0, max_value=100, value=(0, 100), step=1
	)
	is_chat_train = st.sidebar.selectbox("(Train) Is Chat?", [True, False, "Both"], index=2)
	is_chat_test = st.sidebar.selectbox("(Test) Is Chat?", [True, False, "Both"], index=2)
	is_quantized_train = st.sidebar.selectbox(
	"(Train) Is Quantized?", [True, False, "Both"], index=1
	)
	is_quantized_test = st.sidebar.selectbox(
	"(Test) Is Quantized?", [True, False, "Both"], index=1
	)
	is_watermarked_train = st.sidebar.selectbox(
	"(Train) Is Watermark?", [True, False, "Both"], index=1
	)
	is_watermarked_test = st.sidebar.selectbox(
	"(Test) Is Watermark?", [True, False, "Both"], index=1
	)
	model_family_train = st.sidebar.multiselect(
	"Model Family Train",
	MODEL_FAMILES,
	default=MODEL_FAMILES,
	)
	model_family_test = st.sidebar.multiselect(
	"Model Family Test",
	list(MODEL_FAMILES) + ["Adversarial"],
	default=MODEL_FAMILES,
	)

	show_values = st.sidebar.checkbox("Show Values", value=False)

	add_adversarial = False
	if "Adversarial" in model_family_test:
	model_family_test.remove("Adversarial")
	add_adversarial = True

	sort_by_adversarial = False
	if add_adversarial:
	sort_by_adversarial = st.sidebar.checkbox("Sort by adversarial", value=False)

	if st.sidebar.checkbox("Use default color scale", value=False):
	color_scale = "Viridis_r"
	else:
	color_scale = viridis_rgb


	is_debug = st.sidebar.checkbox("Debug", value=False)

	if show_std:
	selected_df = df_std.copy()
	else:
	selected_df = df.copy()


	filtered_df = filter_df(
	selected_df,
	model_family_train,
	model_family_test,
	model_size_train,
	model_size_test,
	is_chat_train,
	is_chat_test,
	is_quantized_train,
	is_quantized_test,
	is_watermarked_train,
	is_watermarked_test,
	sort_by_size,
	split_chat_models,
	split_quantized_models,
	split_watermarked_models,
	filter_empty_col_row,
	is_debug,
	)


	if show_diff:
	# get those 3 columns {'model_size', 'model_family', 'is_chat'}
	diag = filtered_df.values.diagonal()
	filtered_df = filtered_df.sub(diag, axis=1)

	# subtract each row by the diagonal
	if add_adversarial:
	if show_diff:
	index = filtered_df.index
	ood_results_avg = ood_results_avg.loc[index]
	filtered_df = filtered_df.join(ood_results_avg.sub(diag, axis=0))
	else:
	filtered_df = filtered_df.join(ood_results_avg)

	if add_mean:
	col_mean = filtered_df.mean(axis=1)
	row_mean = filtered_df.mean(axis=0)
	diag = filtered_df.values.diagonal()
	filtered_df["mean"] = col_mean
	filtered_df.loc["mean"] = row_mean

	filtered_df = filtered_df * 100
	filtered_df = filtered_df.round(0)

	# sort by the column called Adversarial
	if sort_by_adversarial:
	filtered_df = filtered_df.sort_values(by=["Adversarial"], ascending=False)

	# check if the df has columns and rows
	if filtered_df.shape[0] == 0:
	st.write("No results found")
	st.stop()

	if filtered_df.shape[1] == 0:
	st.write("No results found")
	st.stop()

	fig = px.imshow(
	filtered_df.values,
	x=list(filtered_df.columns),
	y=list(filtered_df.index),
	color_continuous_scale=color_scale,
	contrast_rescaling=None,
	text_auto=show_values,
	aspect="auto",
	)


	# width = st.sidebar.text_input("Width", "1920")
	# height = st.sidebar.text_input("Height", "1080")
	# scale = st.sidebar.text_input("Scale", "1.0")
	# margin = st.sidebar.text_input("Margin[l,r,b,t]", "200,100,100,100")
	fig.update_traces(textfont_size=9)
	fig.update_layout(
	xaxis={"side": "top"},
	yaxis={"side": "left"},
	# margin=dict(
	# l=int(margin.split(",")[0]),
	# r=int(margin.split(",")[1]),
	# b=int(margin.split(",")[2]),
	# t=int(margin.split(",")[3]),
	# ),
	font=dict(size=10),
	)
	fig.update_xaxes(tickangle=45)

	fig.update_xaxes(tickmode="linear")
	fig.update_yaxes(tickmode="linear")
	# change the font in the heatmap
	st.plotly_chart(fig, use_container_width=True)


	# if st.sidebar.button("save", key="save"):
	# fig.write_image(
	# "fig1.pdf",
	# width=int(width),
	# height=int(height),
	# validate=True,
	# scale=float(scale),
	# )


	# plot the col mean vs model size
	if add_mean and not show_diff:
	# check if any of the chat models are in the filtered df columns and index
	if len([x for x in CHAT_MODELS if x in filtered_df.columns]) > 0 or len(
	[x for x in CHAT_MODELS if x in filtered_df.index]
	):
	st.warning(
	"Chat models are in the filtered df columns or index."
	"This will cause the mean graph to be skewed."
	)

	fig3 = px.scatter(
	y=row_mean,
	x=[MODELS[x]["model_size"] for x in filtered_df.columns if x not in ["mean"]],
	# hover_data=[x for x in filtered_df.index if x not in ["mean"]],
	color=[
	MODELS[x]["model_family"] for x in filtered_df.columns if x not in ["mean"]
	],
	color_discrete_sequence=px.colors.qualitative.Plotly,
	title="",
	# x axis title
	labels={
	"x": "Target Model Size",
	"y": "Average ROC AUC",
	"color": "Model Family",
	},
	log_x=True,
	trendline="ols",
	)
	fig4 = px.scatter(
	y=diag,
	x=[MODELS[x]["model_size"] for x in filtered_df.columns if x not in ["mean"]],
	# hover_data=[x for x in filtered_df.index if x not in ["mean"]],
	color=[
	MODELS[x]["model_family"] for x in filtered_df.columns if x not in ["mean"]
	],
	color_discrete_sequence=px.colors.qualitative.Plotly,
	title="",
	# x axis title
	labels={
	"x": "Target Model Size",
	"y": "Self ROC AUC",
	"color": "Model Family",
	},
	log_x=True,
	trendline="ols",
	)

	# put the two plots side by side
	fig_subplot = make_subplots(
	rows=1,
	cols=2,
	shared_yaxes=False,
	subplot_titles=("Self Detection ROC AUC", "Average Target ROC AUC"),
	)
	for i, figure in enumerate([fig4, fig3]):
	for trace in range(len(figure["data"])):
	trace_data = figure["data"][trace]
	if i == 1:
	trace_data["showlegend"] = False
	fig_subplot.append_trace(trace_data, row=1, col=i + 1)

	fig_subplot.update_xaxes(type="log")
	# y axis range
	fig_subplot.update_yaxes(range=[0.90, 1])

	fig_subplot.update_layout(
	height=500,
	width=1200,
	)
	# put the legend on the bottom
	fig_subplot.update_layout(
	legend=dict(orientation="h", yanchor="bottom", y=-0.2, x=0.09)
	)
	st.plotly_chart(fig_subplot, use_container_width=True)

	fig2 = px.scatter(
	y=col_mean,
	x=[MODELS_SIZE_MAPPING[x] for x in filtered_df.index if x not in ["mean"]],
	# hover_data=[x for x in filtered_df.index if x not in ["mean"]],
	color=[
	MODELS_FAMILY_MAPPING[x] for x in filtered_df.index if x not in ["mean"]
	],
	color_discrete_sequence=px.colors.qualitative.Plotly,
	title="Mean vs Train Model Size",
	log_x=True,
	trendline="ols",
	)
	fig2.update_layout(
	height=600,
	width=900,
	)
	st.plotly_chart(fig2, use_container_width=False)