{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "path = \"/home/a03-sgoel/mESMerize/benchmarks/DeepLoc\"" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Unnamed: 0 | \n", "ACC | \n", "Kingdom | \n", "Partition | \n", "Peripheral | \n", "Transmembrane | \n", "LipidAnchor | \n", "Soluble | \n", "Sequence | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "I3R9M8 | \n", "Archaea | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "MSTDSDAETVDLADGVDHQVAMVMDLNKCIGCQTCTVACKSLWTEG... | \n", "
1 | \n", "1 | \n", "I3R9M9 | \n", "Archaea | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "MSRNDASQLDDGETTAESPPDDQANDAPEVGDPPGDPVDADSGVSR... | \n", "
2 | \n", "2 | \n", "Q7ZAG8 | \n", "Archaea | \n", "2 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "MTKVLVLGGRFGALTAAYTLKRLVGSKADVKVINKSRFSYFRPALP... | \n", "
3 | \n", "3 | \n", "Q8PZ67 | \n", "Archaea | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "MPPKIAEVIQHDVCAACGACEAVCPIGAVTVKKAAEIRDPNDLSLY... | \n", "
4 | \n", "4 | \n", "Q9YGA6 | \n", "Archaea | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "MAGVRLVDVWKVFGEVTAVREMSLEVKDGEFMILLGPSGCGKTTTL... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
28021 | \n", "28021 | \n", "P86949 | \n", "Eukaryota | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "MLRFIAIVALIATVNAKGGTYGIGVLPSVTYVSGGGGGYPGIYGTY... | \n", "
28022 | \n", "28022 | \n", "P86950 | \n", "Eukaryota | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "MKPFISLASLIVLIASASAGGDDDYGKYGYGSYGPGIGGIGGGGGG... | \n", "
28023 | \n", "28023 | \n", "P86951 | \n", "Eukaryota | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "MLKLVCAVVLIATVNAKGSSPGFGIGQLPGITVVSGGVSGGSLSGG... | \n", "
28024 | \n", "28024 | \n", "P86983 | \n", "Eukaryota | \n", "3 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "MHQSSLGVLVLFSLIYLCISVHVPFDLNGWKALRLDNNRVQDSTNL... | \n", "
28025 | \n", "28025 | \n", "P86984 | \n", "Eukaryota | \n", "4 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "MLMLLCIIATVIPFSLVEGRKGCWADPTPPGKECLYGKEIHGGRNL... | \n", "
28026 rows × 9 columns
\n", "\n", " | ACC | \n", "Kingdom | \n", "Partition | \n", "Peripheral | \n", "Transmembrane | \n", "LipidAnchor | \n", "Soluble | \n", "Sequence | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "I3R9M8 | \n", "Archaea | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "MSTDSDAETVDLADGVDHQVAMVMDLNKCIGCQTCTVACKSLWTEG... | \n", "
1 | \n", "I3R9M9 | \n", "Archaea | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "MSRNDASQLDDGETTAESPPDDQANDAPEVGDPPGDPVDADSGVSR... | \n", "
2 | \n", "Q7ZAG8 | \n", "Archaea | \n", "2 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "MTKVLVLGGRFGALTAAYTLKRLVGSKADVKVINKSRFSYFRPALP... | \n", "
3 | \n", "Q8PZ67 | \n", "Archaea | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "MPPKIAEVIQHDVCAACGACEAVCPIGAVTVKKAAEIRDPNDLSLY... | \n", "
4 | \n", "Q9YGA6 | \n", "Archaea | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "MAGVRLVDVWKVFGEVTAVREMSLEVKDGEFMILLGPSGCGKTTTL... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
28021 | \n", "P86949 | \n", "Eukaryota | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "MLRFIAIVALIATVNAKGGTYGIGVLPSVTYVSGGGGGYPGIYGTY... | \n", "
28022 | \n", "P86950 | \n", "Eukaryota | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "MKPFISLASLIVLIASASAGGDDDYGKYGYGSYGPGIGGIGGGGGG... | \n", "
28023 | \n", "P86951 | \n", "Eukaryota | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "MLKLVCAVVLIATVNAKGSSPGFGIGQLPGITVVSGGVSGGSLSGG... | \n", "
28024 | \n", "P86983 | \n", "Eukaryota | \n", "3 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "MHQSSLGVLVLFSLIYLCISVHVPFDLNGWKALRLDNNRVQDSTNL... | \n", "
28025 | \n", "P86984 | \n", "Eukaryota | \n", "4 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "MLMLLCIIATVIPFSLVEGRKGCWADPTPPGKECLYGKEIHGGRNL... | \n", "
28026 rows × 8 columns
\n", "