jinaai
/

jina-colbert-v1-en

@@ -50,20 +50,21 @@ n_gpu: int = 1  # Set your number of available GPUs
 experiment: str = ""  # Name of the folder where the logs and created indices will be stored
 index_name: str = ""  # The name of your index, i.e. the name of your vector database
-with Run().context(RunConfig(nranks=n_gpu, experiment=experiment)):
-    config = ColBERTConfig(
-      doc_maxlen=8192  # Our model supports 8k context length for indexing long documents
-    )
-    indexer = Indexer(
-      checkpoint="jinaai/jina-colbert-v1-en",
-      config=config,
-    )
-    documents = [
-      "ColBERT is an efficient and effective passage retrieval model.",
-      "Jina-ColBERT is a ColBERT-style model but based on JinaBERT so it can support both 8k context length.",
-      ...
-    ]
-    indexer.index(name=index_name, collection=documents)
 ```
 ### Searching
@@ -77,17 +78,18 @@ experiment: str = ""  # Name of the folder where the logs and created indices wi
 index_name: str = ""  # Name of your previously created index where the documents you want to search are stored.
 k: int = 10  # how many results you want to retrieve
-with Run().context(RunConfig(nranks=n_gpu, experiment=experiment)):
-    config = ColBERTConfig(
-      query_maxlen=128  # Although the model supports 8k context length, we suggest not to use a very long query, as it may cause significant computational complexity and CUDA memory usage.
-    )
-    searcher = Searcher(
-      index=index_name,
-      config=config
-    )  # You don't need to specify the checkpoint again, the model name is stored in the index.
-    query = "How to use ColBERT for indexing long documents?"
-    results = searcher.search(query, k=k)
-    # results: tuple of tuples of length k containing ((passage_id, passage_rank, passage_score), ...)
 ```
 ## Evaluation Results

 experiment: str = ""  # Name of the folder where the logs and created indices will be stored
 index_name: str = ""  # The name of your index, i.e. the name of your vector database
+if __name__ == "__main__":
+    with Run().context(RunConfig(nranks=n_gpu, experiment=experiment)):
+        config = ColBERTConfig(
+          doc_maxlen=8192  # Our model supports 8k context length for indexing long documents
+        )
+        indexer = Indexer(
+          checkpoint="jinaai/jina-colbert-v1-en",
+          config=config,
+        )
+        documents = [
+          "ColBERT is an efficient and effective passage retrieval model.",
+          "Jina-ColBERT is a ColBERT-style model but based on JinaBERT so it can support both 8k context length.",
+          # Add more documents here to ensure the clustering work correctly
+        ]
+        indexer.index(name=index_name, collection=documents)
 ```
 ### Searching
 index_name: str = ""  # Name of your previously created index where the documents you want to search are stored.
 k: int = 10  # how many results you want to retrieve
+if __name__ == "__main__":
+    with Run().context(RunConfig(nranks=n_gpu, experiment=experiment)):
+        config = ColBERTConfig(
+          query_maxlen=128  # Although the model supports 8k context length, we suggest not to use a very long query, as it may cause significant computational complexity and CUDA memory usage.
+        )
+        searcher = Searcher(
+          index=index_name,
+          config=config
+        )  # You don't need to specify the checkpoint again, the model name is stored in the index.
+        query = "How to use ColBERT for indexing long documents?"
+        results = searcher.search(query, k=k)
+        # results: tuple of tuples of length k containing ((passage_id, passage_rank, passage_score), ...)
 ```
 ## Evaluation Results