{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "\n", "import re" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "res_path = '../results'" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "p = \"/home/jovyan/rmt/babilong-leaderboard/data/BABILong NeurIPS24 Figs - leaderboard.csv\"\n", "res_df = pd.read_csv(p)\n", "# res_df = res_df[res_df.task.isin(['qa1', 'qa2', 'qa3', 'qa4', 'qa5'])]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "lens = [0, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000, 500000, 1000000, 10000000]\n", "len_names = ['0K', '1K', '2K', '4K', '8K', '16K', '32K', '64K', '128K', '512K', '1M', '10M']\n", "\n", "for model_name in res_df.Model.unique():\n", " model_df = res_df[res_df.Model == model_name]\n", " model_name = re.sub('/', ' ', model_name)\n", " for i, row in model_df.iterrows():\n", " for l, ln in zip(lens, len_names):\n", " score = row[ln]\n", " # print(score)\n", " if not pd.isna(score):\n", " score = re.sub(',', '.', score)\n", " score = float(score) / 100\n", " os.makedirs(os.path.join(res_path, model_name), exist_ok=True)\n", " os.makedirs(os.path.join(res_path, model_name, row.task), exist_ok=True)\n", " path = os.path.join(res_path, model_name, row.task, f'{l}.csv')\n", " df = pd.DataFrame([{'result': score}])\n", " df.to_csv(path, index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Calculate average results" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "model_names = next(os.walk(res_path))[1]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 1 | \n", "
---|---|
0 | \n", "2 | \n", "