selimc
/

turkish-colpali

Model card Files Files and versions Metrics Training metrics Community

selimc commited on 20 days ago

Commit

7eafde0

·

verified ·

1 Parent(s): 55f9142

Update README.md

Files changed (1) hide show

README.md +16 -6

README.md CHANGED Viewed

@@ -48,7 +48,11 @@ The training data was created via the following steps:
 The complete code for fine-tuning, testing, and creating similarity maps can be found in the [turkish-colpali GitHub repository](https://github.com/selimcavas/turkish-colpali). All notebooks in the repository are in Turkish to better serve the Turkish NLP community.
 ```python
 import torch
 from PIL import Image
 from transformers import ColPaliForRetrieval, ColPaliProcessor
@@ -61,6 +65,13 @@ model = ColPaliForRetrieval.from_pretrained(
 processor = ColPaliProcessor.from_pretrained(model_name)
 # Your inputs
 images: List[Image.Image] = [
     load_image_from_url(
@@ -77,24 +88,23 @@ images: List[Image.Image] = [
     ),
 ]
-queries = [
     "ekonomiyi düzeltme çabaları demir yolları gelir gider grafik",
     "bitkilerin yapısı bitkisel dokular meristem doku",
     "besin grupları tablosu karbonhidratlar",
     "Türk milli mücadelesi emperyalizm Atatürk görseli"
 ]
-# Process the inputs
 batch_images = processor(images=images).to(model.device)
 batch_queries = processor(text=queries).to(model.device)
 # Forward pass
 with torch.no_grad():
-    image_embeddings = model(**batch_images)
-    query_embeddings = model(**batch_queries)
-# Score the queries against the images
-scores = processor.score_retrieval(query_embeddings, image_embeddings)
 scores

 The complete code for fine-tuning, testing, and creating similarity maps can be found in the [turkish-colpali GitHub repository](https://github.com/selimcavas/turkish-colpali). All notebooks in the repository are in Turkish to better serve the Turkish NLP community.
 ```python
+from io import BytesIO
+from typing import List
+import requests
 import torch
+from IPython.display import display
 from PIL import Image
 from transformers import ColPaliForRetrieval, ColPaliProcessor
 processor = ColPaliProcessor.from_pretrained(model_name)
+def load_image_from_url(url: str) -> Image.Image:
+    """
+    Load a PIL image from a valid URL.
+    """
+    response = requests.get(url)
+    return Image.open(BytesIO(response.content))
 # Your inputs
 images: List[Image.Image] = [
     load_image_from_url(
     ),
 ]
+queries: List[str] = [
     "ekonomiyi düzeltme çabaları demir yolları gelir gider grafik",
     "bitkilerin yapısı bitkisel dokular meristem doku",
     "besin grupları tablosu karbonhidratlar",
     "Türk milli mücadelesi emperyalizm Atatürk görseli"
 ]
+# Preprocess inputs
 batch_images = processor(images=images).to(model.device)
 batch_queries = processor(text=queries).to(model.device)
 # Forward pass
 with torch.no_grad():
+    image_embeddings = model(**batch_images).embeddings
+    query_embeddings = model(**batch_queries).embeddings
+scores = processor.score_retrieval(query_embeddings, image_embeddings)  # (n_queries, n_images)
 scores