ONNX-Demo / pyserini /encoded_query_info.py
ArthurChen189's picture
upload pyserini
62977bb
raw
history blame
22.4 kB
#
# Pyserini: Reproducible IR research with sparse and dense representations
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
QUERY_INFO = {
"aggretriever-cocondenser-msmarco-passage-dev-subset": {
"description": "MS MARCO passage dev set queries encoded by aggretriever-cocondenser",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-aggretriever-cocondenser-msmarco-passage-dev-subset-20230407-f627ef.tar.gz"
],
"md5": "c30ad20c7b101e3034f41597f0fc1f67",
"size (bytes)": 20859862,
"total_queries": 6980,
"downloaded": False
},
"aggretriever-distilbert-msmarco-passage-dev-subset": {
"description": "MS MARCO passage dev set queries encoded by aggretriever-distilbert",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-aggretriever-distilbert-msmarco-passage-dev-subset-20230407-f627ef.tar.gz"
],
"md5": "a6ee094bd681b08e5657ce69185eee82",
"size (bytes)": 20771767,
"total_queries": 6980,
"downloaded": False
},
"tct_colbert-msmarco-passage-dev-subset": {
"description": "MS MARCO passage dev set queries encoded by TCT-ColBERT",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-msmarco-passage-dev-subset-20210419-9323ec.tar.gz",
],
"md5": "b2fe6494241639153f26cc61acf3b39d",
"size (bytes)": 20078757,
"total_queries": 6980,
"downloaded": False
},
"tct_colbert-v2-msmarco-passage-dev-subset": {
"description": "MS MARCO passage dev set queries encoded by TCT-ColBERT V2",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-msmarco-passage-dev-subset-20210608-5f341b.tar.gz",
],
"md5": "ee8d76e596aef02c5027a2ffd0ff66f8",
"size (bytes)": 20072992,
"total_queries": 6980,
"downloaded": False
},
"tct_colbert-v2-hn-msmarco-passage-dev-subset": {
"description": "MS MARCO passage dev set queries encoded by TCT-ColBERT V2 HN",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hn-msmarco-passage-dev-subset-20210608-5f341b.tar.gz",
],
"md5": "f7e39cf2cd3ee53f7f8f2e0a1821431c",
"size (bytes)": 20074411,
"total_queries": 6980,
"downloaded": False
},
"tct_colbert-v2-hnp-msmarco-passage-dev-subset": {
"description": "MS MARCO passage dev set queries encoded by TCT-ColBERT V2 HN+",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hnp-msmarco-passage-dev-subset-20210608-5f341b.tar.gz",
],
"md5": "bed8036475774d12915c8af2a44612f4",
"size (bytes)": 20078958,
"total_queries": 6980,
"downloaded": False
},
"tct_colbert-v2-hnp-dl19-passage": {
"description": "TREC DL19-passage queries encoded by TCT-ColBERT V2 HN+",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hnp-dl19-passage-20230124-99b795.tar.gz",
],
"md5": "ee945fb0a5b17cba4e2e5d51318fbe05",
"size (bytes)": 125193,
"total_queries": 43,
"downloaded": False
},
"tct_colbert-v2-hnp-dl20": {
"description": "TREC DL20 queries encoded by TCT-ColBERT V2 HN+",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hnp-dl20-passage-20230124-99b795.tar.gz",
],
"md5": "b940d3d38cf5a50a9467a4aa7a59d226",
"size (bytes)": 577645,
"total_queries": 200,
"downloaded": False
},
"ance-msmarco-passage-dev-subset": {
"description": "MS MARCO passage dev set queries encoded by ANCE",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance-msmarco-passage-dev-subset-20210419-9323ec.tar.gz",
],
"md5": "adad81bb1495eff2f0463e809ecc01b8",
"size (bytes)": 19965095,
"total_queries": 6980,
"downloaded": False
},
"ance-dl19-passage": {
"description": "TREC DL19 passage queries encoded by ANCE",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance-dl19-passage-20230124-99b79.tar.gz",
],
"md5": "828714ef5481dc49686e14b61881ba06",
"size (bytes)": 124468,
"total_queries": 43,
"downloaded": False
},
"ance-dl20": {
"description": "TREC DL20 queries encoded by ANCE",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance-dl20-passage-20230124-99b79.tar.gz",
],
"md5": "79acea9812a5c20d0d0817b07b348d15",
"size (bytes)": 574183,
"total_queries": 200,
"downloaded": False
},
"tct_colbert-msmarco-doc-dev": {
"description": "MS MARCO Document dev set queries encoded by TCT-ColBERT zero-shot",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-msmarco-doc-dev-20210419-9323ec.tar.gz",
],
"md5": "565fe57f92b229643b68fa3263f089a9",
"size (bytes)": 14940124,
"total_queries": 6980,
"downloaded": False
},
"ance_maxp-msmarco-doc-dev": {
"description": "MS MARCO Document dev set queries encoded by ANCE maxp",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_maxp-msmarco-doc-dev-20210419-9323ec.tar.gz",
],
"md5": "3d41ae797cb97e42649c4f4fa7b97d56",
"size (bytes)": 14854155,
"total_queries": 6980,
"downloaded": False
},
"sbert-msmarco-passage-dev-subset": {
"description": "MS MARCO passage dev set queries encoded by SBERT",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-sbert-msmarco-passage-dev-subset-20210419-9323ec.tar.gz",
],
"md5": "dc0d09a0f5803824c1ad46a39417aa1e",
"size (bytes)": 20058701,
"total_queries": 6980,
"downloaded": False
},
"distilbert_kd-msmarco-passage-dev-subset": {
"description": "MS MARCO passage dev set queries encoded by SBERT",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_kd-msmarco-passage-dev-subset-20210419-9323ec.tar.gz",
],
"md5": "4706ec91183eefa9771e9311fe4799e0",
"size (bytes)": 20013009,
"total_queries": 6980,
"downloaded": False
},
"distilbert_kd-dl19-passage": {
"description": "TREC DL19 passage queries encoded by SBERT",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_kd-dl19-passage-20230124-99b79.tar.gz",
],
"md5": "c9fe8c8112a7d4fcda1aa606af77e66a",
"size (bytes)": 124760,
"total_queries": 43,
"downloaded": False
},
"distilbert_kd-dl20": {
"description": "TREC DL20 queries encoded by SBERT",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_kd-dl20-passage-20230124-99b79.tar.gz",
],
"md5": "09fe19984515145a78183a98e44bd699",
"size (bytes)": 575682,
"total_queries": 200,
"downloaded": False
},
"distilbert_tas_b-msmarco-passage-dev-subset": {
"description": "MS MARCO passage dev set queries encoded by TAS-B",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_dot_tas_b_b256-msmarco-passage-dev-subset-20210527-63276f.tar.gz",
],
"md5": "17a3f81de7ba497728050b83733b1c46",
"size (bytes)": 20016799,
"total_queries": 6980,
"downloaded": False
},
"distilbert_tas_b-dl19-passage": {
"description": "TREC DL19 passage queries encoded by TAS-B",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_dot_tas_b_b256-dl19-passage-20230124-99b795.tar.gz",
],
"md5": "a0a23a1be77e6e9e5dfacf32dfcd5e9b",
"size (bytes)": 124809,
"total_queries": 43,
"downloaded": False
},
"distilbert_tas_b-dl20": {
"description": "TREC DL20 queries encoded by TAS-B",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_dot_tas_b_b256-dl20-passage-20230124-99b795.tar.gz",
],
"md5": "8ffb4d5a17a2c028fb5065ef8a394ab3",
"size (bytes)": 575875,
"total_queries": 200,
"downloaded": False
},
"dpr_multi-nq-dev": {
"description": "Natural Question dev set questions encoded by DPR question encoder trained on multiset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-nq-dev-20210419-9323ec.tar.gz",
],
"md5": "c2fd32438129e4994ce2ce71e08de875",
"size (bytes)": 25129398,
"total_queries": 8757,
"downloaded": False
},
"dpr_multi-nq-test": {
"description": "Natural Question test set questions encoded by DPR question encoder trained on multiset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-nq-test-20210419-9323ec.tar.gz",
],
"md5": "1791f1ed078beb3a00847f75023eb020",
"size (bytes)": 10365005,
"total_queries": 3610,
"downloaded": False
},
"ance_multi-nq-dev": {
"description": "Natural Question dev set questions encoded by ANCE question encoder trained on multiset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-nq-dev-20210419-9323ec.tar.gz",
],
"md5": "a3ed32ec8d5a474f61e3c3a9968b26fd",
"size (bytes)": 25163934,
"total_queries": 8757,
"downloaded": False
},
"ance_multi-nq-test": {
"description": "Natural Question test set questions encoded by ANCE question encoder trained on multiset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-nq-test-20210419-9323ec.tar.gz",
],
"md5": "a356202b7c8f73758732c893a76a8005",
"size (bytes)": 10379384,
"total_queries": 3610,
"downloaded": False
},
"dpr_multi-trivia-dev": {
"description": "TriviaQA dev set questions encoded by DPR question encoder trained on multiset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-trivia-dev-20210419-9323ec.tar.gz",
],
"md5": "efac7b71ef52ca073331e896089456a4",
"size (bytes)": 25517034,
"total_queries": 8837,
"downloaded": False
},
"dpr_multi-trivia-test": {
"description": "TriviaQA test set questions encoded by DPR question encoder trained on multiset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-trivia-test-20210419-9323ec.tar.gz",
],
"md5": "01e95455d55d0495d806549f04a02c24",
"size (bytes)": 32664437,
"total_queries": 11313,
"downloaded": False
},
"ance_multi-trivia-dev": {
"description": "TriviaQA dev set questions encoded by ANCE question encoder trained on multiset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-trivia-dev-20210419-9323ec.tar.gz",
],
"md5": "bd88499a5785b15ba702173cc0e91417",
"size (bytes)": 25559775,
"total_queries": 8837,
"downloaded": False
},
"ance_multi-trivia-test": {
"description": "TriviaQA test set questions encoded by ANCE question encoder trained on multiset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-trivia-test-20210419-9323ec.tar.gz",
],
"md5": "3844dfb7f8feb6b064fa48775a35c6ee",
"size (bytes)": 32717910,
"total_queries": 11313,
"downloaded": False
},
"dpr_multi-wq-test": {
"description": "Web Questions test set questions encoded by DPR question encoder trained on multiset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-wq-test-20210419-9323ec.tar.gz",
],
"md5": "19aa721632d05afe031cc2da83a9a5a5",
"size (bytes)": 5826854,
"total_queries": 2032,
"downloaded": False
},
"dpr_multi-squad-test": {
"description": "SQUAD dev set questions encoded by DPR question encoder trained on multiset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-squad-test-20210419-9323ec.tar.gz",
],
"md5": "d11e0f801a488d51ad2a63b0748f4ae0",
"size (bytes)": 30328268,
"total_queries": 10570,
"downloaded": False
},
"dpr_multi-curated-test": {
"description": "CuratedTREC test set questions encoded by DPR question encoder trained on multiset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-curated-test-20210419-9323ec.tar.gz",
],
"md5": "d1737d3ec5a080d93350ae76b02c7fd1",
"size (bytes)": 1995280,
"total_queries": 694,
"downloaded": False
},
"dpr_single_nq-nq-dev": {
"description": "NQ dev set questions encoded by DPR question encoder trained on NQ dataset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_single_nq-nq-dev-20210419-9323ec.tar.gz",
],
"md5": "1a992f8d5336dc8654bba5ab7e375ebe",
"size (bytes)": 25123288,
"total_queries": 8757,
"downloaded": False
},
"dpr_single_nq-nq-test": {
"description": "NQ test set questions encoded by DPR question encoder trained on NQ dataset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_single_nq-nq-test-20210419-9323ec.tar.gz",
],
"md5": "e64bb009b6ba8bfe40d4b9967fd69240",
"size (bytes)": 10362252,
"total_queries": 3610,
"downloaded": False
},
"bpr_single_nq-nq-test": {
"description": "NQ test set questions encoded by BPR question encoder trained on NQ dataset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-bpr_single_nq-nq-test-20210827-8a8f75.tar.gz",
],
"md5": "b139d5a096ad52d2abc66fb54ec66158",
"size (bytes)": 11094680,
"total_queries": 3610,
"downloaded": False
},
"dkrr-dpr-nq-retriever-dpr-nq-dev": {
"description": "DPR-NQ dev set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-dpr-nq-dev-20220304-7ffa54.tar.gz",
],
"md5": "fe1276ae841bd5be6f3e0daac144273a",
"size (bytes)": 25146740,
"total_queries": 8757,
"downloaded": False
},
"dkrr-dpr-nq-retriever-dpr-nq-test": {
"description": "DPR-NQ test set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-dpr-nq-test-20220304-7ffa54.tar.gz",
],
"md5": "6c7793a0a89e7d10309a6973c52de326",
"size (bytes)": 10370414,
"total_queries": 3610,
"downloaded": False
},
"dkrr-dpr-nq-retriever-nq-dev": {
"description": "NQ dev set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-nq-dev-20220304-7ffa54.tar.gz",
],
"md5": "3c84c7fb6569d7690d5c38be61d3a5a4",
"size (bytes)": 25146526,
"total_queries": 8757,
"downloaded": False
},
"dkrr-dpr-nq-retriever-nq-test": {
"description": "NQ test set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-nq-test-20220304-7ffa54.tar.gz",
],
"md5": "cd3c30fc6dfde160983167b59acb17a3",
"size (bytes)": 10370264,
"total_queries": 3610,
"downloaded": False
},
"dkrr-dpr-tqa-retriever-dpr-tqa-dev": {
"description": "TriviaQA dev set questions encoded by castorini/dkrr-dpr-tqa-retriever trained on TriviaQA dataset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-tqa-retriever-tqa-dev-20220304-7ffa54.tar.gz",
],
"md5": "f9ca5060cf7794b681cd4fe3d3708c4d",
"size (bytes)": 25540932,
"total_queries": 8837,
"downloaded": False
},
"dkrr-dpr-tqa-retriever-dpr-tqa-test": {
"description": "TriviaQA test set questions encoded by castorini/dkrr-dpr-tqa-retriever trained on TriviaQA dataset",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-tqa-retriever-tqa-test-20220304-7ffa54.tar.gz",
],
"md5": "9cbd030c3a4478b7eb8356844bacc45b",
"size (bytes)": 32688909,
"total_queries": 11313,
"downloaded": False
},
"wiki-6-3-all-dpr2-multi-nq-test": {
"description": "NQ test set questions encoded by castorini/wiki-all-6-3-multi-dpr2-query-encoder.",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-wiki-all-6-3-dpr2-multi-retriever-nq-test-20230103-186fa7.tar.gz",
],
"md5": "2632ca1392a33e975d505acd5090250a",
"size (bytes)": 10354577,
"total_queries": 3610,
"downloaded": False
},
"wiki-6-3-all-dpr2-multi-dpr-trivia-test": {
"description": "TriviaQA test set questions encoded by castorini/wiki-all-6-3-multi-dpr2-query-encoder.",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-wiki-all-6-3-dpr2-multi-retriever-dpr-trivia-test-20230103-186fa7.tar.gz",
],
"md5": "d0abf8ff598daaec35acd972a465b0e2",
"size (bytes)": 32620950,
"total_queries": 11313,
"downloaded": False
},
"openai-ada2-dl19-passage": {
"description": "TREC DL19 passage queries encoded by OpenAI ada2.",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-openai-ada2-dl19-passage-20230530-e3a58f.tar.gz",
],
"md5": "ab57dab62c5b43508c661b78d6f7b6b9",
"size (bytes)": 418940,
"total_queries": 43,
"downloaded": False
},
"openai-ada2-dl20": {
"description": "TREC DL20 passage queries encoded by OpenAI ada2.",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-openai-ada2-dl20-passage-20230530-e3a58f.tar.gz",
],
"md5": "fe711c1e146647396fd06f125882d01c",
"size (bytes)": 1939404,
"total_queries": 200,
"downloaded": False
},
"openai-ada2-dl19-passage-hyde": {
"description": "TREC DL19 passage queries encoded by HyDE-OpenAI ada2.",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-openai-ada2-hyde-dl19-passage-20230530-e3a58f.tar.gz",
],
"md5": "bc981187dc18f3fbf21698605e2349b5",
"size (bytes)": 508400,
"total_queries": 43,
"downloaded": False
},
"openai-ada2-dl20-hyde": {
"description": "TREC DL20 passage queries encoded by HyDE-OpenAI ada2.",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-openai-ada2-hyde-dl20-passage-20230530-e3a58f.tar.gz",
],
"md5": "12389d6affdab9231996834f7022beab",
"size (bytes)": 645105,
"total_queries": 200,
"downloaded": False
},
"openai-ada2-msmarco-passage-dev-subset": {
"description": "MS MARCO passage dev set queries encoded by OpenAI ada2.",
"urls": [
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-openai-ada2-msmarco-passage-dev-subset-20230530-e3a58f.tar.gz",
],
"md5": "0d9c7311e2e3819183d7ae2b4889e4ba",
"size (bytes)": 67615770,
"total_queries": 6980,
"downloaded": False
},
}