henryhyunwookim
commited on
Commit
β’
b47611f
1
Parent(s):
70c8cf6
Upload 15 files
Browse files- .gitattributes +1 -0
- .gitignore +2 -0
- README.md +1 -13
- app.py +51 -0
- data/data_set.pkl +3 -0
- log/20240530.log +74 -0
- notebook_1.ipynb +0 -0
- notebook_2.ipynb +1148 -0
- notebook_3.ipynb +0 -0
- notebook_4.ipynb +0 -0
- poetry.lock +0 -0
- pyproject.toml +29 -0
- requirements.txt +15 -0
- utils/__pycache__/utils.cpython-311.pyc +0 -0
- utils/utils.py +154 -0
- vectore_storage/chroma/chroma.sqlite3 +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
vectore_storage/chroma/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
log/
|
2 |
+
__pycache__
|
README.md
CHANGED
@@ -1,13 +1 @@
|
|
1 |
-
|
2 |
-
title: GTA Multimodal RAG
|
3 |
-
emoji: π
|
4 |
-
colorFrom: pink
|
5 |
-
colorTo: green
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 4.32.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: mit
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
# GrandTheftAuto-multimodal-RAG-application
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils.utils import get_logger, initialization, get_result
|
2 |
+
import gradio as gr
|
3 |
+
import logging
|
4 |
+
|
5 |
+
|
6 |
+
logger = get_logger()
|
7 |
+
collection = None
|
8 |
+
|
9 |
+
|
10 |
+
def main(query):
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
print("Starting search...")
|
13 |
+
logger.info("Starting search...")
|
14 |
+
print("-------------------------------------------------------")
|
15 |
+
logger.info("-------------------------------------------------------")
|
16 |
+
exit = False
|
17 |
+
while not exit:
|
18 |
+
# Collect user query
|
19 |
+
# query = input('Type your query, or "exit" if you want to exit: ')
|
20 |
+
|
21 |
+
if query == "exit":
|
22 |
+
exit = True
|
23 |
+
print("-------------------------------------------------------")
|
24 |
+
logger.info("-------------------------------------------------------")
|
25 |
+
print("Search terminated.")
|
26 |
+
logger.info("Search terminated.")
|
27 |
+
return None, "Search terminated."
|
28 |
+
else:
|
29 |
+
# Get search result including the original descriptions of the images
|
30 |
+
image, text = get_result(collection, data_set, query, model, n_results=2)
|
31 |
+
|
32 |
+
# Display the image, its caption, and user query
|
33 |
+
# show_image(image, text, query)
|
34 |
+
return image, text
|
35 |
+
|
36 |
+
|
37 |
+
if __name__ == "__main__":
|
38 |
+
try:
|
39 |
+
if collection == None:
|
40 |
+
collection, data_set, model, logger = initialization(logger)
|
41 |
+
# main()
|
42 |
+
app = gr.Interface(
|
43 |
+
fn=main,
|
44 |
+
inputs=["text"],
|
45 |
+
outputs=["image", "text"],
|
46 |
+
title="Search for a scene in the world of GTA!"
|
47 |
+
)
|
48 |
+
app.launch(share=True)
|
49 |
+
except Exception as e:
|
50 |
+
logger.exception(e)
|
51 |
+
raise e
|
data/data_set.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3b259221c8f9df17cde27c4e0913b9ce110c768910ff81e40c3db443196b68c
|
3 |
+
size 3229
|
log/20240530.log
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-05-30 15:47:21 INFO Initializing...
|
2 |
+
2024-05-30 15:47:21 INFO -------------------------------------------------------
|
3 |
+
2024-05-30 15:47:21 INFO Importing functions...
|
4 |
+
2024-05-30 15:47:29 INFO Set directories...
|
5 |
+
2024-05-30 15:47:29 INFO Loading data...
|
6 |
+
2024-05-30 15:47:30 INFO Loading CLIP model...
|
7 |
+
2024-05-30 15:47:30 INFO Load pretrained SentenceTransformer: sentence-transformers/clip-ViT-L-14
|
8 |
+
2024-05-30 15:47:34 INFO Use pytorch device_name: cpu
|
9 |
+
2024-05-30 15:47:34 INFO Getting vector embeddings...
|
10 |
+
2024-05-30 15:48:33 INFO Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.
|
11 |
+
2024-05-30 15:48:33 INFO Collection image_vectors is not created.
|
12 |
+
2024-05-30 15:48:34 INFO -------------------------------------------------------
|
13 |
+
2024-05-30 15:48:34 INFO Initialization completed! Ready for search.
|
14 |
+
2024-05-30 15:48:35 INFO HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 "
|
15 |
+
2024-05-30 15:48:35 INFO HTTP Request: GET http://127.0.0.1:7860/startup-events "HTTP/1.1 200 OK"
|
16 |
+
2024-05-30 15:48:35 INFO HTTP Request: HEAD http://127.0.0.1:7860/ "HTTP/1.1 200 OK"
|
17 |
+
2024-05-30 15:48:36 INFO HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
|
18 |
+
2024-05-30 15:48:36 INFO HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK"
|
19 |
+
2024-05-30 15:49:30 INFO Starting search...
|
20 |
+
2024-05-30 15:49:30 INFO -------------------------------------------------------
|
21 |
+
2024-05-30 15:52:06 INFO Starting search...
|
22 |
+
2024-05-30 15:52:06 INFO -------------------------------------------------------
|
23 |
+
2024-05-30 15:53:34 INFO Starting search...
|
24 |
+
2024-05-30 15:53:34 INFO -------------------------------------------------------
|
25 |
+
2024-05-30 15:53:34 INFO -------------------------------------------------------
|
26 |
+
2024-05-30 15:53:34 INFO Search terminated.
|
27 |
+
2024-05-30 15:54:41 INFO Initializing...
|
28 |
+
2024-05-30 15:54:41 INFO -------------------------------------------------------
|
29 |
+
2024-05-30 15:54:41 INFO Importing functions...
|
30 |
+
2024-05-30 15:54:50 INFO Set directories...
|
31 |
+
2024-05-30 15:54:50 INFO Loading data...
|
32 |
+
2024-05-30 15:54:51 INFO Loading CLIP model...
|
33 |
+
2024-05-30 15:54:51 INFO Load pretrained SentenceTransformer: sentence-transformers/clip-ViT-L-14
|
34 |
+
2024-05-30 15:54:55 INFO Use pytorch device_name: cpu
|
35 |
+
2024-05-30 15:54:55 INFO Getting vector embeddings...
|
36 |
+
2024-05-30 15:55:59 INFO Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.
|
37 |
+
2024-05-30 15:56:00 INFO Collection image_vectors is not created.
|
38 |
+
2024-05-30 15:56:01 INFO -------------------------------------------------------
|
39 |
+
2024-05-30 15:56:01 INFO Initialization completed! Ready for search.
|
40 |
+
2024-05-30 15:56:02 INFO HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 "
|
41 |
+
2024-05-30 15:56:02 INFO HTTP Request: GET http://127.0.0.1:7860/startup-events "HTTP/1.1 200 OK"
|
42 |
+
2024-05-30 15:56:02 INFO HTTP Request: HEAD http://127.0.0.1:7860/ "HTTP/1.1 200 OK"
|
43 |
+
2024-05-30 15:56:02 INFO HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
|
44 |
+
2024-05-30 15:56:03 INFO HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK"
|
45 |
+
2024-05-30 15:56:34 INFO Starting search...
|
46 |
+
2024-05-30 15:56:34 INFO -------------------------------------------------------
|
47 |
+
2024-05-30 15:57:55 INFO Starting search...
|
48 |
+
2024-05-30 15:57:55 INFO -------------------------------------------------------
|
49 |
+
2024-05-30 15:57:55 INFO -------------------------------------------------------
|
50 |
+
2024-05-30 15:57:55 INFO Search terminated.
|
51 |
+
2024-05-30 16:11:29 INFO Initializing...
|
52 |
+
2024-05-30 16:11:29 INFO -------------------------------------------------------
|
53 |
+
2024-05-30 16:11:29 INFO Importing functions...
|
54 |
+
2024-05-30 16:11:37 INFO Set directories...
|
55 |
+
2024-05-30 16:11:37 INFO Loading data...
|
56 |
+
2024-05-30 16:11:38 INFO Loading CLIP model...
|
57 |
+
2024-05-30 16:11:38 INFO Load pretrained SentenceTransformer: sentence-transformers/clip-ViT-L-14
|
58 |
+
2024-05-30 16:11:42 INFO Use pytorch device_name: cpu
|
59 |
+
2024-05-30 16:11:42 INFO Getting vector embeddings...
|
60 |
+
2024-05-30 16:12:38 INFO Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.
|
61 |
+
2024-05-30 16:12:38 INFO Collection image_vectors is not created.
|
62 |
+
2024-05-30 16:12:39 INFO -------------------------------------------------------
|
63 |
+
2024-05-30 16:12:39 INFO Initialization completed! Ready for search.
|
64 |
+
2024-05-30 16:12:40 INFO HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 "
|
65 |
+
2024-05-30 16:12:40 INFO HTTP Request: GET http://127.0.0.1:7860/startup-events "HTTP/1.1 200 OK"
|
66 |
+
2024-05-30 16:12:40 INFO HTTP Request: HEAD http://127.0.0.1:7860/ "HTTP/1.1 200 OK"
|
67 |
+
2024-05-30 16:12:40 INFO HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
|
68 |
+
2024-05-30 16:12:41 INFO HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK"
|
69 |
+
2024-05-30 16:14:12 INFO Starting search...
|
70 |
+
2024-05-30 16:14:12 INFO -------------------------------------------------------
|
71 |
+
2024-05-30 16:15:31 INFO Starting search...
|
72 |
+
2024-05-30 16:15:31 INFO -------------------------------------------------------
|
73 |
+
2024-05-30 16:15:31 INFO -------------------------------------------------------
|
74 |
+
2024-05-30 16:15:31 INFO Search terminated.
|
notebook_1.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebook_2.ipynb
ADDED
@@ -0,0 +1,1148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"Data set already exists in the local drive. Loading it.\n"
|
13 |
+
]
|
14 |
+
}
|
15 |
+
],
|
16 |
+
"source": [
|
17 |
+
"import os\n",
|
18 |
+
"from pathlib import Path\n",
|
19 |
+
"import pickle\n",
|
20 |
+
"from datasets import load_dataset\n",
|
21 |
+
"\n",
|
22 |
+
"curr_dir = Path(os.getcwd())\n",
|
23 |
+
"data_dir = curr_dir / 'data'\n",
|
24 |
+
"if not os.path.exists(data_dir):\n",
|
25 |
+
" os.mkdir(data_dir)\n",
|
26 |
+
"data_pickle_path = data_dir / 'data_set.pkl'\n",
|
27 |
+
"\n",
|
28 |
+
"if not os.path.exists(data_pickle_path):\n",
|
29 |
+
" print(f\"Data set hasn't been loaded. Loading from the datasets library and save it as a pickle.\")\n",
|
30 |
+
" data_set = load_dataset(\"vipulmaheshwari/GTA-Image-Captioning-Dataset\")\n",
|
31 |
+
" with open(data_pickle_path, 'wb') as outfile:\n",
|
32 |
+
" pickle.dump(data_set, outfile)\n",
|
33 |
+
"else:\n",
|
34 |
+
" print(f\"Data set already exists in the local drive. Loading it.\")\n",
|
35 |
+
" with open(data_pickle_path, 'rb') as infile:\n",
|
36 |
+
" data_set = pickle.load(infile)"
|
37 |
+
]
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"cell_type": "code",
|
41 |
+
"execution_count": 17,
|
42 |
+
"metadata": {},
|
43 |
+
"outputs": [],
|
44 |
+
"source": [
|
45 |
+
"# print(data_set)\n",
|
46 |
+
"# len(data_set['train']['image']), len(data_set['train']['text'])"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"cell_type": "code",
|
51 |
+
"execution_count": 44,
|
52 |
+
"metadata": {},
|
53 |
+
"outputs": [],
|
54 |
+
"source": [
|
55 |
+
"# Source: https://huggingface.co/sentence-transformers/clip-ViT-L-14\n",
|
56 |
+
"\n",
|
57 |
+
"from sentence_transformers import SentenceTransformer, util\n",
|
58 |
+
"# from PIL import Image\n",
|
59 |
+
"\n",
|
60 |
+
"#Load CLIP model\n",
|
61 |
+
"model = SentenceTransformer(\"sentence-transformers/clip-ViT-L-14\") # SentenceTransformer('clip-ViT-L-14')\n",
|
62 |
+
"\n",
|
63 |
+
"#Encode an image:\n",
|
64 |
+
"# img_emb = model.encode(image) # Image.open('two_dogs_in_snow.jpg')\n",
|
65 |
+
"\n",
|
66 |
+
"# #Encode text descriptions\n",
|
67 |
+
"# text_emb = model.encode(text) # ['Two dogs in the snow', 'A cat on a table', 'A picture of London at night']\n",
|
68 |
+
"\n",
|
69 |
+
"# #Compute cosine similarities \n",
|
70 |
+
"# cos_scores = util.cos_sim(img_emb, text_emb)\n",
|
71 |
+
"# print(cos_scores)"
|
72 |
+
]
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"cell_type": "code",
|
76 |
+
"execution_count": null,
|
77 |
+
"metadata": {},
|
78 |
+
"outputs": [],
|
79 |
+
"source": [
|
80 |
+
"img_embeddings = []\n",
|
81 |
+
"for image in tqdm(data_set['train']['image'][:2]):\n",
|
82 |
+
" img_embedding = model.encode(image)\n",
|
83 |
+
" img_embeddings.append(img_embedding)"
|
84 |
+
]
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"cell_type": "code",
|
88 |
+
"execution_count": null,
|
89 |
+
"metadata": {},
|
90 |
+
"outputs": [],
|
91 |
+
"source": []
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"cell_type": "code",
|
95 |
+
"execution_count": null,
|
96 |
+
"metadata": {},
|
97 |
+
"outputs": [],
|
98 |
+
"source": []
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"cell_type": "code",
|
102 |
+
"execution_count": null,
|
103 |
+
"metadata": {},
|
104 |
+
"outputs": [],
|
105 |
+
"source": []
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"cell_type": "markdown",
|
109 |
+
"metadata": {},
|
110 |
+
"source": [
|
111 |
+
"# try FAISS. Chroma, Pinecone (check the GAFS project)"
|
112 |
+
]
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"cell_type": "code",
|
116 |
+
"execution_count": null,
|
117 |
+
"metadata": {},
|
118 |
+
"outputs": [],
|
119 |
+
"source": []
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"cell_type": "code",
|
123 |
+
"execution_count": null,
|
124 |
+
"metadata": {},
|
125 |
+
"outputs": [],
|
126 |
+
"source": []
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"cell_type": "code",
|
130 |
+
"execution_count": null,
|
131 |
+
"metadata": {},
|
132 |
+
"outputs": [],
|
133 |
+
"source": []
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"cell_type": "code",
|
137 |
+
"execution_count": null,
|
138 |
+
"metadata": {},
|
139 |
+
"outputs": [],
|
140 |
+
"source": []
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"cell_type": "code",
|
144 |
+
"execution_count": null,
|
145 |
+
"metadata": {},
|
146 |
+
"outputs": [],
|
147 |
+
"source": []
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"cell_type": "code",
|
151 |
+
"execution_count": null,
|
152 |
+
"metadata": {},
|
153 |
+
"outputs": [],
|
154 |
+
"source": []
|
155 |
+
},
|
156 |
+
{
|
157 |
+
"cell_type": "code",
|
158 |
+
"execution_count": null,
|
159 |
+
"metadata": {},
|
160 |
+
"outputs": [],
|
161 |
+
"source": [
|
162 |
+
"import pyarrow as pa\n",
|
163 |
+
"import lancedb\n",
|
164 |
+
"\n",
|
165 |
+
"db = lancedb.connect('./data/tables')\n",
|
166 |
+
"schema = pa.schema(\n",
|
167 |
+
" [\n",
|
168 |
+
" pa.field(\"vector\", pa.list_(pa.float32())),\n",
|
169 |
+
" # pa.field(\"text\", pa.string()),\n",
|
170 |
+
" # pa.field(\"id\", pa.int32())\n",
|
171 |
+
" ])\n",
|
172 |
+
"# tbl = db.create_table(\"gta_data\", schema=schema, mode=\"overwrite\")"
|
173 |
+
]
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"cell_type": "code",
|
177 |
+
"execution_count": 60,
|
178 |
+
"metadata": {},
|
179 |
+
"outputs": [
|
180 |
+
{
|
181 |
+
"name": "stderr",
|
182 |
+
"output_type": "stream",
|
183 |
+
"text": [
|
184 |
+
"100%|ββββββββββ| 2/2 [00:15<00:00, 7.65s/it]\n"
|
185 |
+
]
|
186 |
+
}
|
187 |
+
],
|
188 |
+
"source": [
|
189 |
+
"from tqdm import tqdm\n",
|
190 |
+
"import numpy as np\n",
|
191 |
+
"\n",
|
192 |
+
"img_embeddings = []\n",
|
193 |
+
"for image in tqdm(data_set['train']['image'][:2]):\n",
|
194 |
+
" img_embedding = model.encode(image)\n",
|
195 |
+
" img_embeddings.append(img_embedding)\n",
|
196 |
+
"\n",
|
197 |
+
"tbl_data = pa.Table.from_arrays([pa.array(img_embeddings)], [\"vector\"])\n",
|
198 |
+
"tbl = db.create_table(\"gta_data\", tbl_data, schema=schema, mode=\"overwrite\")\n",
|
199 |
+
"\n",
|
200 |
+
"# tbl.add(img_embeddings)\n",
|
201 |
+
"# tbl.to_pandas()"
|
202 |
+
]
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"cell_type": "code",
|
206 |
+
"execution_count": 63,
|
207 |
+
"metadata": {},
|
208 |
+
"outputs": [
|
209 |
+
{
|
210 |
+
"ename": "TypeError",
|
211 |
+
"evalue": "Query column vector must be a vector. Got list<item: float>.",
|
212 |
+
"output_type": "error",
|
213 |
+
"traceback": [
|
214 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
215 |
+
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
|
216 |
+
"Cell \u001b[1;32mIn[63], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mtbl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43ma road with a stop\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvector_column_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mvector\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlimit\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2\u001b[0m res\n",
|
217 |
+
"File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\grandtheftauto-multimodal-rag-application-ufxwo2j--py3.11\\Lib\\site-packages\\lancedb\\query.py:262\u001b[0m, in \u001b[0;36mLanceQueryBuilder.to_pandas\u001b[1;34m(self, flatten)\u001b[0m\n\u001b[0;32m 247\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mto_pandas\u001b[39m(\u001b[38;5;28mself\u001b[39m, flatten: Optional[Union[\u001b[38;5;28mint\u001b[39m, \u001b[38;5;28mbool\u001b[39m]] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpd.DataFrame\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m 248\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 249\u001b[0m \u001b[38;5;124;03m Execute the query and return the results as a pandas DataFrame.\u001b[39;00m\n\u001b[0;32m 250\u001b[0m \u001b[38;5;124;03m In addition to the selected columns, LanceDB also returns a vector\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 260\u001b[0m \u001b[38;5;124;03m If unspecified, do not flatten the nested columns.\u001b[39;00m\n\u001b[0;32m 261\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 262\u001b[0m tbl \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_arrow\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 263\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flatten \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m 264\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n",
|
218 |
+
"File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\grandtheftauto-multimodal-rag-application-ufxwo2j--py3.11\\Lib\\site-packages\\lancedb\\query.py:527\u001b[0m, in \u001b[0;36mLanceVectorQueryBuilder.to_arrow\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 518\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mto_arrow\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m pa\u001b[38;5;241m.\u001b[39mTable:\n\u001b[0;32m 519\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 520\u001b[0m \u001b[38;5;124;03m Execute the query and return the results as an\u001b[39;00m\n\u001b[0;32m 521\u001b[0m \u001b[38;5;124;03m [Apache Arrow Table](https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table).\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 525\u001b[0m \u001b[38;5;124;03m vector and the returned vectors.\u001b[39;00m\n\u001b[0;32m 526\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 527\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_batches\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mread_all()\n",
|
219 |
+
"File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\grandtheftauto-multimodal-rag-application-ufxwo2j--py3.11\\Lib\\site-packages\\lancedb\\query.py:557\u001b[0m, in \u001b[0;36mLanceVectorQueryBuilder.to_batches\u001b[1;34m(self, batch_size)\u001b[0m\n\u001b[0;32m 544\u001b[0m vector \u001b[38;5;241m=\u001b[39m [v\u001b[38;5;241m.\u001b[39mtolist() \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m vector]\n\u001b[0;32m 545\u001b[0m query \u001b[38;5;241m=\u001b[39m Query(\n\u001b[0;32m 546\u001b[0m vector\u001b[38;5;241m=\u001b[39mvector,\n\u001b[0;32m 547\u001b[0m \u001b[38;5;28mfilter\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_where,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 555\u001b[0m with_row_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_with_row_id,\n\u001b[0;32m 556\u001b[0m )\n\u001b[1;32m--> 557\u001b[0m result_set \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_table\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execute_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 558\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reranker \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 559\u001b[0m rs_table \u001b[38;5;241m=\u001b[39m result_set\u001b[38;5;241m.\u001b[39mread_all()\n",
|
220 |
+
"File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\grandtheftauto-multimodal-rag-application-ufxwo2j--py3.11\\Lib\\site-packages\\lancedb\\table.py:1616\u001b[0m, in \u001b[0;36mLanceTable._execute_query\u001b[1;34m(self, query, batch_size)\u001b[0m\n\u001b[0;32m 1612\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_execute_query\u001b[39m(\n\u001b[0;32m 1613\u001b[0m \u001b[38;5;28mself\u001b[39m, query: Query, batch_size: Optional[\u001b[38;5;28mint\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1614\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m pa\u001b[38;5;241m.\u001b[39mRecordBatchReader:\n\u001b[0;32m 1615\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mto_lance()\n\u001b[1;32m-> 1616\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscanner\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1617\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1618\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mfilter\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfilter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1619\u001b[0m \u001b[43m \u001b[49m\u001b[43mprefilter\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprefilter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1620\u001b[0m \u001b[43m \u001b[49m\u001b[43mnearest\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[0;32m 1621\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcolumn\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvector_column\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1622\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mq\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvector\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1623\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mk\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1624\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmetric\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmetric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1625\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnprobes\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnprobes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1626\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrefine_factor\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrefine_factor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1627\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1628\u001b[0m \u001b[43m \u001b[49m\u001b[43mwith_row_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwith_row_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1629\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1630\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mto_reader()\n",
|
221 |
+
"File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\grandtheftauto-multimodal-rag-application-ufxwo2j--py3.11\\Lib\\site-packages\\lance\\dataset.py:321\u001b[0m, in \u001b[0;36mLanceDataset.scanner\u001b[1;34m(self, columns, filter, limit, offset, nearest, batch_size, batch_readahead, fragment_readahead, scan_in_order, fragments, prefilter, with_row_id, use_stats)\u001b[0m\n\u001b[0;32m 305\u001b[0m builder \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 306\u001b[0m ScannerBuilder(\u001b[38;5;28mself\u001b[39m)\n\u001b[0;32m 307\u001b[0m \u001b[38;5;241m.\u001b[39mcolumns(columns)\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 318\u001b[0m \u001b[38;5;241m.\u001b[39muse_stats(use_stats)\n\u001b[0;32m 319\u001b[0m )\n\u001b[0;32m 320\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m nearest \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 321\u001b[0m builder \u001b[38;5;241m=\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnearest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mnearest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 322\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m builder\u001b[38;5;241m.\u001b[39mto_scanner()\n",
|
222 |
+
"File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\grandtheftauto-multimodal-rag-application-ufxwo2j--py3.11\\Lib\\site-packages\\lance\\dataset.py:2049\u001b[0m, in \u001b[0;36mScannerBuilder.nearest\u001b[1;34m(self, column, q, k, metric, nprobes, refine_factor, use_index)\u001b[0m\n\u001b[0;32m 2047\u001b[0m column_type \u001b[38;5;241m=\u001b[39m column_type\u001b[38;5;241m.\u001b[39mstorage_type\n\u001b[0;32m 2048\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m pa\u001b[38;5;241m.\u001b[39mtypes\u001b[38;5;241m.\u001b[39mis_fixed_size_list(column_type):\n\u001b[1;32m-> 2049\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[0;32m 2050\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mQuery column \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcolumn\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m must be a vector. Got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcolumn_field\u001b[38;5;241m.\u001b[39mtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 2051\u001b[0m )\n\u001b[0;32m 2052\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(q) \u001b[38;5;241m!=\u001b[39m column_type\u001b[38;5;241m.\u001b[39mlist_size:\n\u001b[0;32m 2053\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 2054\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mQuery vector size \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(q)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not match index column size\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 2055\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcolumn_type\u001b[38;5;241m.\u001b[39mlist_size\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 2056\u001b[0m )\n",
|
223 |
+
"\u001b[1;31mTypeError\u001b[0m: Query column vector must be a vector. Got list<item: float>."
|
224 |
+
]
|
225 |
+
}
|
226 |
+
],
|
227 |
+
"source": [
|
228 |
+
"res = tbl.search(model.encode(\"a road with a stop\"), vector_column_name=\"vector\").limit(3).to_pandas()\n",
|
229 |
+
"res"
|
230 |
+
]
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"cell_type": "code",
|
234 |
+
"execution_count": null,
|
235 |
+
"metadata": {},
|
236 |
+
"outputs": [],
|
237 |
+
"source": []
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"cell_type": "code",
|
241 |
+
"execution_count": null,
|
242 |
+
"metadata": {},
|
243 |
+
"outputs": [],
|
244 |
+
"source": []
|
245 |
+
},
|
246 |
+
{
|
247 |
+
"cell_type": "code",
|
248 |
+
"execution_count": null,
|
249 |
+
"metadata": {},
|
250 |
+
"outputs": [],
|
251 |
+
"source": [
|
252 |
+
"# https://huggingface.co/openai/clip-vit-large-patch14"
|
253 |
+
]
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"cell_type": "code",
|
257 |
+
"execution_count": 24,
|
258 |
+
"metadata": {},
|
259 |
+
"outputs": [],
|
260 |
+
"source": [
|
261 |
+
"import clip\n",
|
262 |
+
"import torch\n",
|
263 |
+
"import os\n",
|
264 |
+
"from datasets import load_dataset\n",
|
265 |
+
"\n",
|
266 |
+
"# ds = load_dataset(\"vipulmaheshwari/GTA-Image-Captioning-Dataset\")\n",
|
267 |
+
"# device = torch.device(\"mps\")\n",
|
268 |
+
"model, preprocess = clip.load(\"ViT-L/14\") # , device=device"
|
269 |
+
]
|
270 |
+
},
|
271 |
+
{
|
272 |
+
"cell_type": "code",
|
273 |
+
"execution_count": 15,
|
274 |
+
"metadata": {},
|
275 |
+
"outputs": [
|
276 |
+
{
|
277 |
+
"data": {
|
278 |
+
"text/plain": [
|
279 |
+
"768"
|
280 |
+
]
|
281 |
+
},
|
282 |
+
"execution_count": 15,
|
283 |
+
"metadata": {},
|
284 |
+
"output_type": "execute_result"
|
285 |
+
}
|
286 |
+
],
|
287 |
+
"source": [
|
288 |
+
"def embed_txt(txt):\n",
|
289 |
+
" tokenized_text = clip.tokenize([txt])\n",
|
290 |
+
" embeddings = model.encode_text(tokenized_text)\n",
|
291 |
+
" \n",
|
292 |
+
" # Detach, move to CPU, convert to numpy array, and extract the first element as a list\n",
|
293 |
+
" result = embeddings.detach().numpy()[0].tolist()\n",
|
294 |
+
" return result\n",
|
295 |
+
"\n",
|
296 |
+
"len(embed_txt(\"a road with a stop\"))"
|
297 |
+
]
|
298 |
+
},
|
299 |
+
{
|
300 |
+
"cell_type": "code",
|
301 |
+
"execution_count": 11,
|
302 |
+
"metadata": {},
|
303 |
+
"outputs": [
|
304 |
+
{
|
305 |
+
"data": {
|
306 |
+
"text/plain": [
|
307 |
+
"[1.172108769416809,\n",
|
308 |
+
" 0.5741956830024719,\n",
|
309 |
+
" -0.11420677602291107,\n",
|
310 |
+
" -0.5107784271240234,\n",
|
311 |
+
" -0.7742195725440979,\n",
|
312 |
+
" 0.7895426750183105,\n",
|
313 |
+
" 0.31811264157295227,\n",
|
314 |
+
" 0.5389135479927063,\n",
|
315 |
+
" 0.17074763774871826,\n",
|
316 |
+
" -1.0352754592895508,\n",
|
317 |
+
" -0.013449656777083874,\n",
|
318 |
+
" -0.5795634388923645,\n",
|
319 |
+
" -0.37020763754844666,\n",
|
320 |
+
" -0.7534741163253784,\n",
|
321 |
+
" 0.6788989901542664,\n",
|
322 |
+
" -0.1245330423116684,\n",
|
323 |
+
" 1.0375893115997314,\n",
|
324 |
+
" -0.08196641504764557,\n",
|
325 |
+
" 0.169560506939888,\n",
|
326 |
+
" -0.3306411802768707,\n",
|
327 |
+
" 0.6850194931030273,\n",
|
328 |
+
" -0.4113234281539917,\n",
|
329 |
+
" -0.3725243806838989,\n",
|
330 |
+
" -0.8902166485786438,\n",
|
331 |
+
" -0.2419223040342331,\n",
|
332 |
+
" 0.33643779158592224,\n",
|
333 |
+
" 0.18724264204502106,\n",
|
334 |
+
" 0.6745221018791199,\n",
|
335 |
+
" 0.00899740681052208,\n",
|
336 |
+
" -0.29769381880760193,\n",
|
337 |
+
" 0.6830898523330688,\n",
|
338 |
+
" 0.7002785205841064,\n",
|
339 |
+
" 0.5598942041397095,\n",
|
340 |
+
" -0.27884775400161743,\n",
|
341 |
+
" 0.29804039001464844,\n",
|
342 |
+
" 0.4663200378417969,\n",
|
343 |
+
" -0.40516427159309387,\n",
|
344 |
+
" -0.2796509861946106,\n",
|
345 |
+
" -0.3568377196788788,\n",
|
346 |
+
" 0.7982958555221558,\n",
|
347 |
+
" 1.0218019485473633,\n",
|
348 |
+
" -0.3191905915737152,\n",
|
349 |
+
" -0.8690600395202637,\n",
|
350 |
+
" -0.5986450910568237,\n",
|
351 |
+
" 0.6520456671714783,\n",
|
352 |
+
" 0.8482719659805298,\n",
|
353 |
+
" 0.45436325669288635,\n",
|
354 |
+
" -0.24868743121623993,\n",
|
355 |
+
" -0.22428922355175018,\n",
|
356 |
+
" -0.3995105028152466,\n",
|
357 |
+
" 0.1387435346841812,\n",
|
358 |
+
" 0.030430370941758156,\n",
|
359 |
+
" 0.1954972743988037,\n",
|
360 |
+
" 0.36345618963241577,\n",
|
361 |
+
" 0.23408269882202148,\n",
|
362 |
+
" 0.030055442824959755,\n",
|
363 |
+
" -0.13948054611682892,\n",
|
364 |
+
" -0.6816356778144836,\n",
|
365 |
+
" -0.2554306387901306,\n",
|
366 |
+
" -0.8186500668525696,\n",
|
367 |
+
" 0.0802079439163208,\n",
|
368 |
+
" -0.28623825311660767,\n",
|
369 |
+
" 0.889072060585022,\n",
|
370 |
+
" 0.3205733895301819,\n",
|
371 |
+
" 1.4578713178634644,\n",
|
372 |
+
" 0.5289382934570312,\n",
|
373 |
+
" -0.9107804894447327,\n",
|
374 |
+
" -0.1899547427892685,\n",
|
375 |
+
" -0.39814451336860657,\n",
|
376 |
+
" 0.07741428166627884,\n",
|
377 |
+
" 0.00696764700114727,\n",
|
378 |
+
" 0.8374080657958984,\n",
|
379 |
+
" 0.17547933757305145,\n",
|
380 |
+
" -0.6835469007492065,\n",
|
381 |
+
" 0.44190704822540283,\n",
|
382 |
+
" -0.258558452129364,\n",
|
383 |
+
" -0.16306370496749878,\n",
|
384 |
+
" 0.17053553462028503,\n",
|
385 |
+
" 0.8770076036453247,\n",
|
386 |
+
" 0.2896091341972351,\n",
|
387 |
+
" -0.2233574390411377,\n",
|
388 |
+
" -0.30297425389289856,\n",
|
389 |
+
" -0.7410178780555725,\n",
|
390 |
+
" 0.010058385320007801,\n",
|
391 |
+
" -0.7731197476387024,\n",
|
392 |
+
" -0.2569619417190552,\n",
|
393 |
+
" 0.05559535324573517,\n",
|
394 |
+
" 0.6135262846946716,\n",
|
395 |
+
" -0.5267459154129028,\n",
|
396 |
+
" -0.14416567981243134,\n",
|
397 |
+
" 0.3300650715827942,\n",
|
398 |
+
" 0.3322101831436157,\n",
|
399 |
+
" 0.260479211807251,\n",
|
400 |
+
" -0.6002621054649353,\n",
|
401 |
+
" 0.033296529203653336,\n",
|
402 |
+
" 0.5030784010887146,\n",
|
403 |
+
" -0.5291236042976379,\n",
|
404 |
+
" 0.11839054524898529,\n",
|
405 |
+
" -0.2279912680387497,\n",
|
406 |
+
" -0.24884033203125,\n",
|
407 |
+
" -0.27888786792755127,\n",
|
408 |
+
" -0.1304142028093338,\n",
|
409 |
+
" 0.1286783516407013,\n",
|
410 |
+
" 0.15377336740493774,\n",
|
411 |
+
" 0.5802848935127258,\n",
|
412 |
+
" -0.3416184186935425,\n",
|
413 |
+
" -0.41235557198524475,\n",
|
414 |
+
" 0.04911366105079651,\n",
|
415 |
+
" 0.28588297963142395,\n",
|
416 |
+
" 1.097459316253662,\n",
|
417 |
+
" 0.8836804628372192,\n",
|
418 |
+
" -0.06680312007665634,\n",
|
419 |
+
" 0.5119672417640686,\n",
|
420 |
+
" 0.1433386206626892,\n",
|
421 |
+
" 0.3975537121295929,\n",
|
422 |
+
" 0.751021683216095,\n",
|
423 |
+
" -0.5127158761024475,\n",
|
424 |
+
" -1.0673898458480835,\n",
|
425 |
+
" -0.810725212097168,\n",
|
426 |
+
" -0.9325631260871887,\n",
|
427 |
+
" 0.28165996074676514,\n",
|
428 |
+
" -1.1700552701950073,\n",
|
429 |
+
" -0.6979520916938782,\n",
|
430 |
+
" 0.09645866602659225,\n",
|
431 |
+
" -0.15432433784008026,\n",
|
432 |
+
" -0.6545705199241638,\n",
|
433 |
+
" -0.2297753095626831,\n",
|
434 |
+
" 0.9147917628288269,\n",
|
435 |
+
" -0.3901214897632599,\n",
|
436 |
+
" -0.08340626955032349,\n",
|
437 |
+
" -0.0342048779129982,\n",
|
438 |
+
" 0.4271363615989685,\n",
|
439 |
+
" 0.3410806655883789,\n",
|
440 |
+
" -0.14932666718959808,\n",
|
441 |
+
" 0.05415431410074234,\n",
|
442 |
+
" -0.5995809435844421,\n",
|
443 |
+
" -0.33829835057258606,\n",
|
444 |
+
" -0.23623280227184296,\n",
|
445 |
+
" -0.5740441679954529,\n",
|
446 |
+
" 0.3325800895690918,\n",
|
447 |
+
" -0.18519632518291473,\n",
|
448 |
+
" -0.26904159784317017,\n",
|
449 |
+
" 0.03128799423575401,\n",
|
450 |
+
" 0.15838740766048431,\n",
|
451 |
+
" -0.003409828059375286,\n",
|
452 |
+
" -0.2664038836956024,\n",
|
453 |
+
" -0.6785658597946167,\n",
|
454 |
+
" 0.4431314170360565,\n",
|
455 |
+
" -0.38189026713371277,\n",
|
456 |
+
" 0.5427551865577698,\n",
|
457 |
+
" 0.5074883103370667,\n",
|
458 |
+
" -0.186558797955513,\n",
|
459 |
+
" 0.08342668414115906,\n",
|
460 |
+
" 0.04791847988963127,\n",
|
461 |
+
" -0.1341174989938736,\n",
|
462 |
+
" 0.8764032125473022,\n",
|
463 |
+
" -0.10158982127904892,\n",
|
464 |
+
" 0.9622796177864075,\n",
|
465 |
+
" -0.058163080364465714,\n",
|
466 |
+
" -1.0029855966567993,\n",
|
467 |
+
" -0.22422465682029724,\n",
|
468 |
+
" 1.2381765842437744,\n",
|
469 |
+
" 0.17981192469596863,\n",
|
470 |
+
" 0.034056372940540314,\n",
|
471 |
+
" -0.2695963978767395,\n",
|
472 |
+
" -0.21056877076625824,\n",
|
473 |
+
" -0.3712306320667267,\n",
|
474 |
+
" 0.17336499691009521,\n",
|
475 |
+
" 0.5278773903846741,\n",
|
476 |
+
" 0.7908108234405518,\n",
|
477 |
+
" -1.034334659576416,\n",
|
478 |
+
" -0.5650461912155151,\n",
|
479 |
+
" -0.7466263175010681,\n",
|
480 |
+
" -0.16805803775787354,\n",
|
481 |
+
" 0.39045724272727966,\n",
|
482 |
+
" -0.5074604749679565,\n",
|
483 |
+
" 0.29658886790275574,\n",
|
484 |
+
" -0.1186276450753212,\n",
|
485 |
+
" 0.7888982892036438,\n",
|
486 |
+
" -0.00017159162962343544,\n",
|
487 |
+
" 0.9989897608757019,\n",
|
488 |
+
" 0.21528062224388123,\n",
|
489 |
+
" 0.3544112741947174,\n",
|
490 |
+
" -0.18352235853672028,\n",
|
491 |
+
" -0.5933219790458679,\n",
|
492 |
+
" -0.4221193492412567,\n",
|
493 |
+
" 0.20716431736946106,\n",
|
494 |
+
" 0.026883812621235847,\n",
|
495 |
+
" 1.2931787967681885,\n",
|
496 |
+
" 0.3020362854003906,\n",
|
497 |
+
" 0.26052647829055786,\n",
|
498 |
+
" 0.056001197546720505,\n",
|
499 |
+
" -0.5442985892295837,\n",
|
500 |
+
" -0.24692402780056,\n",
|
501 |
+
" -0.04342973232269287,\n",
|
502 |
+
" 0.32930392026901245,\n",
|
503 |
+
" -0.7617244124412537,\n",
|
504 |
+
" 0.26960083842277527,\n",
|
505 |
+
" 0.29244083166122437,\n",
|
506 |
+
" -0.2099844217300415,\n",
|
507 |
+
" 0.2785693407058716,\n",
|
508 |
+
" 0.07669660449028015,\n",
|
509 |
+
" -0.1421067714691162,\n",
|
510 |
+
" 0.46162599325180054,\n",
|
511 |
+
" 0.3855959475040436,\n",
|
512 |
+
" 0.27650055289268494,\n",
|
513 |
+
" -0.44994688034057617,\n",
|
514 |
+
" -0.28603509068489075,\n",
|
515 |
+
" -0.5041812062263489,\n",
|
516 |
+
" -0.3805933892726898,\n",
|
517 |
+
" 0.5895918011665344,\n",
|
518 |
+
" 0.6383715867996216,\n",
|
519 |
+
" -0.08397688716650009,\n",
|
520 |
+
" 0.22880668938159943,\n",
|
521 |
+
" -0.25133225321769714,\n",
|
522 |
+
" 0.2853071093559265,\n",
|
523 |
+
" -0.0931459441781044,\n",
|
524 |
+
" 0.3020959496498108,\n",
|
525 |
+
" 0.24055352807044983,\n",
|
526 |
+
" 0.18953140079975128,\n",
|
527 |
+
" -0.17559008300304413,\n",
|
528 |
+
" 0.11638100445270538,\n",
|
529 |
+
" 0.5736441612243652,\n",
|
530 |
+
" 0.34651291370391846,\n",
|
531 |
+
" 0.0011261674808338284,\n",
|
532 |
+
" 0.6858928203582764,\n",
|
533 |
+
" -0.3585776090621948,\n",
|
534 |
+
" 0.21113723516464233,\n",
|
535 |
+
" -0.451948344707489,\n",
|
536 |
+
" -0.6812528371810913,\n",
|
537 |
+
" -0.37171897292137146,\n",
|
538 |
+
" -0.11487153172492981,\n",
|
539 |
+
" -0.7819438576698303,\n",
|
540 |
+
" 0.2523130476474762,\n",
|
541 |
+
" -0.006692436058074236,\n",
|
542 |
+
" 0.5665392279624939,\n",
|
543 |
+
" -0.5619456768035889,\n",
|
544 |
+
" 0.06306441873311996,\n",
|
545 |
+
" 0.21295419335365295,\n",
|
546 |
+
" 0.5865535140037537,\n",
|
547 |
+
" 0.27423301339149475,\n",
|
548 |
+
" 0.2840102016925812,\n",
|
549 |
+
" -0.37136274576187134,\n",
|
550 |
+
" 0.016866570338606834,\n",
|
551 |
+
" 0.2263607531785965,\n",
|
552 |
+
" 0.43608683347702026,\n",
|
553 |
+
" -0.4567808508872986,\n",
|
554 |
+
" 0.9201197028160095,\n",
|
555 |
+
" -0.28868433833122253,\n",
|
556 |
+
" 0.2835354208946228,\n",
|
557 |
+
" 0.5691022276878357,\n",
|
558 |
+
" -0.24377702176570892,\n",
|
559 |
+
" 0.5043097138404846,\n",
|
560 |
+
" -0.41853949427604675,\n",
|
561 |
+
" 0.03636287525296211,\n",
|
562 |
+
" -0.07350795716047287,\n",
|
563 |
+
" -0.06902104616165161,\n",
|
564 |
+
" 0.32698169350624084,\n",
|
565 |
+
" -0.24132660031318665,\n",
|
566 |
+
" 0.0912783071398735,\n",
|
567 |
+
" -1.047544002532959,\n",
|
568 |
+
" -0.8717364072799683,\n",
|
569 |
+
" -0.8879557847976685,\n",
|
570 |
+
" 0.301925927400589,\n",
|
571 |
+
" -1.2747677564620972,\n",
|
572 |
+
" 0.10643213242292404,\n",
|
573 |
+
" 0.050040390342473984,\n",
|
574 |
+
" -0.6990651488304138,\n",
|
575 |
+
" 0.4598444104194641,\n",
|
576 |
+
" -0.2630557417869568,\n",
|
577 |
+
" 0.3260715901851654,\n",
|
578 |
+
" 0.15428033471107483,\n",
|
579 |
+
" 0.10122397541999817,\n",
|
580 |
+
" 0.07699556648731232,\n",
|
581 |
+
" 0.06605273485183716,\n",
|
582 |
+
" -0.2160506695508957,\n",
|
583 |
+
" -0.1665394902229309,\n",
|
584 |
+
" -0.5145867466926575,\n",
|
585 |
+
" -0.8410879373550415,\n",
|
586 |
+
" -0.3635564148426056,\n",
|
587 |
+
" -0.14213085174560547,\n",
|
588 |
+
" -0.3718281686306,\n",
|
589 |
+
" -0.2025422751903534,\n",
|
590 |
+
" -0.45895904302597046,\n",
|
591 |
+
" 0.16690057516098022,\n",
|
592 |
+
" -0.29905644059181213,\n",
|
593 |
+
" 0.03865504637360573,\n",
|
594 |
+
" 0.23067855834960938,\n",
|
595 |
+
" 0.23403894901275635,\n",
|
596 |
+
" -0.3748420774936676,\n",
|
597 |
+
" -0.4377340078353882,\n",
|
598 |
+
" -0.6237973570823669,\n",
|
599 |
+
" -0.5650405287742615,\n",
|
600 |
+
" -0.12215842306613922,\n",
|
601 |
+
" -0.23550915718078613,\n",
|
602 |
+
" -0.030611969530582428,\n",
|
603 |
+
" 0.1457085907459259,\n",
|
604 |
+
" 0.39134201407432556,\n",
|
605 |
+
" 0.7538257241249084,\n",
|
606 |
+
" -0.5013869404792786,\n",
|
607 |
+
" -0.22639918327331543,\n",
|
608 |
+
" 0.324470579624176,\n",
|
609 |
+
" 0.2524488568305969,\n",
|
610 |
+
" -0.6817197799682617,\n",
|
611 |
+
" -0.1683609038591385,\n",
|
612 |
+
" 0.09771472215652466,\n",
|
613 |
+
" -0.324865460395813,\n",
|
614 |
+
" 0.38337022066116333,\n",
|
615 |
+
" -0.148436039686203,\n",
|
616 |
+
" 0.7256155610084534,\n",
|
617 |
+
" -0.9280087947845459,\n",
|
618 |
+
" -0.6846877336502075,\n",
|
619 |
+
" -0.37772396206855774,\n",
|
620 |
+
" 0.03854738548398018,\n",
|
621 |
+
" -0.5223367214202881,\n",
|
622 |
+
" 0.04659451171755791,\n",
|
623 |
+
" -1.2525877952575684,\n",
|
624 |
+
" 0.15308304131031036,\n",
|
625 |
+
" -0.2739616334438324,\n",
|
626 |
+
" 0.07301849126815796,\n",
|
627 |
+
" 0.7795864939689636,\n",
|
628 |
+
" -0.2228480577468872,\n",
|
629 |
+
" -0.35411256551742554,\n",
|
630 |
+
" -0.6261951923370361,\n",
|
631 |
+
" 0.20154286921024323,\n",
|
632 |
+
" -0.02966398000717163,\n",
|
633 |
+
" -0.7075097560882568,\n",
|
634 |
+
" -0.45100030303001404,\n",
|
635 |
+
" -0.5318045020103455,\n",
|
636 |
+
" 0.22182771563529968,\n",
|
637 |
+
" 0.08000355958938599,\n",
|
638 |
+
" 0.16378679871559143,\n",
|
639 |
+
" 0.33453676104545593,\n",
|
640 |
+
" -0.20498014986515045,\n",
|
641 |
+
" -0.5192173719406128,\n",
|
642 |
+
" 0.3957352936267853,\n",
|
643 |
+
" -0.21540209650993347,\n",
|
644 |
+
" -0.26865679025650024,\n",
|
645 |
+
" -0.9579092264175415,\n",
|
646 |
+
" 0.29295825958251953,\n",
|
647 |
+
" 0.07182762026786804,\n",
|
648 |
+
" 0.2812371850013733,\n",
|
649 |
+
" 0.5159787535667419,\n",
|
650 |
+
" -0.1598782241344452,\n",
|
651 |
+
" -0.02911016158759594,\n",
|
652 |
+
" 0.10978005081415176,\n",
|
653 |
+
" -1.152063012123108,\n",
|
654 |
+
" -1.075944423675537,\n",
|
655 |
+
" -0.19859834015369415,\n",
|
656 |
+
" 0.48424282670021057,\n",
|
657 |
+
" -0.3020830452442169,\n",
|
658 |
+
" 0.0681198462843895,\n",
|
659 |
+
" -0.03712642937898636,\n",
|
660 |
+
" -0.26295045018196106,\n",
|
661 |
+
" 0.23075002431869507,\n",
|
662 |
+
" 0.03392830863595009,\n",
|
663 |
+
" 0.5592344999313354,\n",
|
664 |
+
" 0.27158620953559875,\n",
|
665 |
+
" 0.08701741695404053,\n",
|
666 |
+
" -0.2469501793384552,\n",
|
667 |
+
" 0.7389507293701172,\n",
|
668 |
+
" 0.3184473216533661,\n",
|
669 |
+
" -0.5283591151237488,\n",
|
670 |
+
" -0.35726648569107056,\n",
|
671 |
+
" 0.2647046446800232,\n",
|
672 |
+
" 0.06684468686580658,\n",
|
673 |
+
" -0.4558630883693695,\n",
|
674 |
+
" -0.3814390301704407,\n",
|
675 |
+
" 0.6464404463768005,\n",
|
676 |
+
" -0.3603093922138214,\n",
|
677 |
+
" -0.7406730651855469,\n",
|
678 |
+
" -0.06739675253629684,\n",
|
679 |
+
" 0.3286390006542206,\n",
|
680 |
+
" 0.07030770927667618,\n",
|
681 |
+
" 0.20259763300418854,\n",
|
682 |
+
" -0.18537510931491852,\n",
|
683 |
+
" 0.39111021161079407,\n",
|
684 |
+
" -0.1252942532300949,\n",
|
685 |
+
" 0.1268956959247589,\n",
|
686 |
+
" -0.10496045649051666,\n",
|
687 |
+
" 1.1690759658813477,\n",
|
688 |
+
" 0.23655962944030762,\n",
|
689 |
+
" 0.2556387782096863,\n",
|
690 |
+
" -0.30134761333465576,\n",
|
691 |
+
" -0.3626421391963959,\n",
|
692 |
+
" -0.35505855083465576,\n",
|
693 |
+
" -0.22458982467651367,\n",
|
694 |
+
" -0.40729954838752747,\n",
|
695 |
+
" -0.40974897146224976,\n",
|
696 |
+
" 0.028972748667001724,\n",
|
697 |
+
" 0.6284871101379395,\n",
|
698 |
+
" 0.3097871243953705,\n",
|
699 |
+
" -0.1652112752199173,\n",
|
700 |
+
" 1.0627437829971313,\n",
|
701 |
+
" -0.6887637376785278,\n",
|
702 |
+
" -0.031500522047281265,\n",
|
703 |
+
" -0.0873744785785675,\n",
|
704 |
+
" -0.9616701006889343,\n",
|
705 |
+
" 0.3587159216403961,\n",
|
706 |
+
" 0.1391131579875946,\n",
|
707 |
+
" -0.19815994799137115,\n",
|
708 |
+
" 0.7807681560516357,\n",
|
709 |
+
" 0.2649019658565521,\n",
|
710 |
+
" -0.48934823274612427,\n",
|
711 |
+
" -0.7037213444709778,\n",
|
712 |
+
" -0.39783185720443726,\n",
|
713 |
+
" -0.36193808913230896,\n",
|
714 |
+
" -0.6811600923538208,\n",
|
715 |
+
" -0.18488575518131256,\n",
|
716 |
+
" 0.6047443151473999,\n",
|
717 |
+
" -0.17012985050678253,\n",
|
718 |
+
" -0.11221067607402802,\n",
|
719 |
+
" -0.11349140107631683,\n",
|
720 |
+
" -7.79653263092041,\n",
|
721 |
+
" -0.03174687176942825,\n",
|
722 |
+
" -0.5907049179077148,\n",
|
723 |
+
" -0.0845143049955368,\n",
|
724 |
+
" 0.6719594597816467,\n",
|
725 |
+
" -0.6047013998031616,\n",
|
726 |
+
" -0.4621417820453644,\n",
|
727 |
+
" 0.4189649224281311,\n",
|
728 |
+
" 0.2606521546840668,\n",
|
729 |
+
" -0.5251185894012451,\n",
|
730 |
+
" 0.656951904296875,\n",
|
731 |
+
" -0.14103704690933228,\n",
|
732 |
+
" -0.724404513835907,\n",
|
733 |
+
" 0.032266344875097275,\n",
|
734 |
+
" -0.38332653045654297,\n",
|
735 |
+
" 0.2214561551809311,\n",
|
736 |
+
" -0.11025898903608322,\n",
|
737 |
+
" 0.2219904512166977,\n",
|
738 |
+
" -0.16805943846702576,\n",
|
739 |
+
" -0.22911910712718964,\n",
|
740 |
+
" 0.40065279603004456,\n",
|
741 |
+
" 0.8264251947402954,\n",
|
742 |
+
" -0.25879043340682983,\n",
|
743 |
+
" -0.4252917170524597,\n",
|
744 |
+
" -0.1860014647245407,\n",
|
745 |
+
" 0.21712413430213928,\n",
|
746 |
+
" 0.852258026599884,\n",
|
747 |
+
" 1.1114447116851807,\n",
|
748 |
+
" 0.03458324819803238,\n",
|
749 |
+
" -0.42567503452301025,\n",
|
750 |
+
" -0.4035224914550781,\n",
|
751 |
+
" 0.5391470789909363,\n",
|
752 |
+
" 0.6653061509132385,\n",
|
753 |
+
" -0.15112830698490143,\n",
|
754 |
+
" 0.20673374831676483,\n",
|
755 |
+
" 0.5916152596473694,\n",
|
756 |
+
" 0.10783706605434418,\n",
|
757 |
+
" 0.06303859502077103,\n",
|
758 |
+
" -0.6804474592208862,\n",
|
759 |
+
" 0.46267828345298767,\n",
|
760 |
+
" -0.8944555521011353,\n",
|
761 |
+
" -0.20007365942001343,\n",
|
762 |
+
" -0.18524183332920074,\n",
|
763 |
+
" -0.25279444456100464,\n",
|
764 |
+
" 0.013942774385213852,\n",
|
765 |
+
" -0.227418452501297,\n",
|
766 |
+
" -0.5019238591194153,\n",
|
767 |
+
" -0.259070485830307,\n",
|
768 |
+
" -0.4195726811885834,\n",
|
769 |
+
" -0.2565968334674835,\n",
|
770 |
+
" 0.08592142164707184,\n",
|
771 |
+
" -0.4816386103630066,\n",
|
772 |
+
" -0.7389425039291382,\n",
|
773 |
+
" 0.384757936000824,\n",
|
774 |
+
" 1.148498773574829,\n",
|
775 |
+
" -0.08795226365327835,\n",
|
776 |
+
" -0.7781391143798828,\n",
|
777 |
+
" -0.18237966299057007,\n",
|
778 |
+
" 0.27100449800491333,\n",
|
779 |
+
" 0.7376315593719482,\n",
|
780 |
+
" -0.2066810131072998,\n",
|
781 |
+
" -0.042161568999290466,\n",
|
782 |
+
" 0.14717990159988403,\n",
|
783 |
+
" -0.25498059391975403,\n",
|
784 |
+
" 0.33164745569229126,\n",
|
785 |
+
" -0.3789907693862915,\n",
|
786 |
+
" -0.702992856502533,\n",
|
787 |
+
" -0.46402469277381897,\n",
|
788 |
+
" -0.47181829810142517,\n",
|
789 |
+
" -0.530529260635376,\n",
|
790 |
+
" 0.08136516064405441,\n",
|
791 |
+
" 0.3396340608596802,\n",
|
792 |
+
" -0.21239398419857025,\n",
|
793 |
+
" 0.38136026263237,\n",
|
794 |
+
" -0.9020550847053528,\n",
|
795 |
+
" -0.41401106119155884,\n",
|
796 |
+
" -0.47626185417175293,\n",
|
797 |
+
" -0.34683799743652344,\n",
|
798 |
+
" -0.3377147912979126,\n",
|
799 |
+
" -0.6628923416137695,\n",
|
800 |
+
" 0.2143520712852478,\n",
|
801 |
+
" 0.31117284297943115,\n",
|
802 |
+
" 0.43092554807662964,\n",
|
803 |
+
" 0.12191533297300339,\n",
|
804 |
+
" -0.017828848212957382,\n",
|
805 |
+
" -0.12583602964878082,\n",
|
806 |
+
" 0.33957740664482117,\n",
|
807 |
+
" -0.09169825166463852,\n",
|
808 |
+
" 0.24532632529735565,\n",
|
809 |
+
" 0.5283830165863037,\n",
|
810 |
+
" 0.7038718461990356,\n",
|
811 |
+
" 0.6268500089645386,\n",
|
812 |
+
" 0.00923143420368433,\n",
|
813 |
+
" 0.8284425139427185,\n",
|
814 |
+
" 0.6025779247283936,\n",
|
815 |
+
" 0.5495515465736389,\n",
|
816 |
+
" -0.34349843859672546,\n",
|
817 |
+
" 0.3288527727127075,\n",
|
818 |
+
" 0.1823807954788208,\n",
|
819 |
+
" 0.2601393759250641,\n",
|
820 |
+
" -0.01894410327076912,\n",
|
821 |
+
" 0.535849928855896,\n",
|
822 |
+
" -0.07729293406009674,\n",
|
823 |
+
" -0.05701117962598801,\n",
|
824 |
+
" -0.5398024320602417,\n",
|
825 |
+
" -0.2532539665699005,\n",
|
826 |
+
" -0.02206384763121605,\n",
|
827 |
+
" -0.5667169690132141,\n",
|
828 |
+
" -0.1217791885137558,\n",
|
829 |
+
" 0.37247171998023987,\n",
|
830 |
+
" -0.11095214635133743,\n",
|
831 |
+
" -0.615912914276123,\n",
|
832 |
+
" 0.32324957847595215,\n",
|
833 |
+
" 0.45441827178001404,\n",
|
834 |
+
" 0.23056231439113617,\n",
|
835 |
+
" -2.3405637741088867,\n",
|
836 |
+
" -0.3898467421531677,\n",
|
837 |
+
" -0.03767596557736397,\n",
|
838 |
+
" -0.17562665045261383,\n",
|
839 |
+
" 0.40651726722717285,\n",
|
840 |
+
" -0.45753777027130127,\n",
|
841 |
+
" 1.0350662469863892,\n",
|
842 |
+
" -0.45301544666290283,\n",
|
843 |
+
" 0.5571080446243286,\n",
|
844 |
+
" -0.7762919068336487,\n",
|
845 |
+
" -0.2582171857357025,\n",
|
846 |
+
" -0.8123776316642761,\n",
|
847 |
+
" 0.027839435264468193,\n",
|
848 |
+
" 0.021091900765895844,\n",
|
849 |
+
" -0.3034447133541107,\n",
|
850 |
+
" 0.34992972016334534,\n",
|
851 |
+
" -0.6623353958129883,\n",
|
852 |
+
" -0.2909213602542877,\n",
|
853 |
+
" -0.18953290581703186,\n",
|
854 |
+
" -0.5997650623321533,\n",
|
855 |
+
" 0.8640273213386536,\n",
|
856 |
+
" -0.24815954267978668,\n",
|
857 |
+
" -0.29709047079086304,\n",
|
858 |
+
" 0.8860780000686646,\n",
|
859 |
+
" 0.04529644176363945,\n",
|
860 |
+
" 1.1951236724853516,\n",
|
861 |
+
" -1.1161422729492188,\n",
|
862 |
+
" -0.04289549961686134,\n",
|
863 |
+
" -1.6880977153778076,\n",
|
864 |
+
" -0.16583313047885895,\n",
|
865 |
+
" -0.4640212059020996,\n",
|
866 |
+
" 0.03880169615149498,\n",
|
867 |
+
" -0.4149312973022461,\n",
|
868 |
+
" 0.5659136772155762,\n",
|
869 |
+
" -0.07184366881847382,\n",
|
870 |
+
" 0.6438769102096558,\n",
|
871 |
+
" -1.1572128534317017,\n",
|
872 |
+
" 0.32702523469924927,\n",
|
873 |
+
" 0.19401556253433228,\n",
|
874 |
+
" -0.36513882875442505,\n",
|
875 |
+
" -0.1496993601322174,\n",
|
876 |
+
" 0.5544662475585938,\n",
|
877 |
+
" -0.10601028800010681,\n",
|
878 |
+
" 0.2943094074726105,\n",
|
879 |
+
" -0.9837754368782043,\n",
|
880 |
+
" -0.14144904911518097,\n",
|
881 |
+
" 0.7259737253189087,\n",
|
882 |
+
" 0.05785682797431946,\n",
|
883 |
+
" 0.8584915995597839,\n",
|
884 |
+
" -0.27259302139282227,\n",
|
885 |
+
" -0.6073381900787354,\n",
|
886 |
+
" -0.22768571972846985,\n",
|
887 |
+
" 0.7255773544311523,\n",
|
888 |
+
" 0.1539279967546463,\n",
|
889 |
+
" -0.6805699467658997,\n",
|
890 |
+
" -1.0378549098968506,\n",
|
891 |
+
" -0.597703754901886,\n",
|
892 |
+
" -0.6462168097496033,\n",
|
893 |
+
" 1.1171226501464844,\n",
|
894 |
+
" -0.21000456809997559,\n",
|
895 |
+
" -0.7443035244941711,\n",
|
896 |
+
" -0.16614656150341034,\n",
|
897 |
+
" 0.03670107200741768,\n",
|
898 |
+
" 0.23261283338069916,\n",
|
899 |
+
" -0.5053027272224426,\n",
|
900 |
+
" -1.0062577724456787,\n",
|
901 |
+
" 0.028607431799173355,\n",
|
902 |
+
" 0.6196390986442566,\n",
|
903 |
+
" 0.11939772218465805,\n",
|
904 |
+
" 0.16041713953018188,\n",
|
905 |
+
" 0.012548833154141903,\n",
|
906 |
+
" -0.6940840482711792,\n",
|
907 |
+
" -1.0390965938568115,\n",
|
908 |
+
" 0.3209550082683563,\n",
|
909 |
+
" -0.5268062353134155,\n",
|
910 |
+
" 0.5799688696861267,\n",
|
911 |
+
" -0.3353428542613983,\n",
|
912 |
+
" -0.3517853319644928,\n",
|
913 |
+
" -0.38189470767974854,\n",
|
914 |
+
" 0.23297882080078125,\n",
|
915 |
+
" 0.045969072729349136,\n",
|
916 |
+
" 0.6408992409706116,\n",
|
917 |
+
" -0.23498287796974182,\n",
|
918 |
+
" -0.2744370400905609,\n",
|
919 |
+
" -0.3386567234992981,\n",
|
920 |
+
" 0.16898459196090698,\n",
|
921 |
+
" 0.4274075925350189,\n",
|
922 |
+
" -0.4734047055244446,\n",
|
923 |
+
" -0.02491043135523796,\n",
|
924 |
+
" -0.5023868680000305,\n",
|
925 |
+
" -0.1599859893321991,\n",
|
926 |
+
" -0.28793132305145264,\n",
|
927 |
+
" 0.45987895131111145,\n",
|
928 |
+
" 0.12111934274435043,\n",
|
929 |
+
" 0.695939838886261,\n",
|
930 |
+
" 0.18703705072402954,\n",
|
931 |
+
" 0.11010603606700897,\n",
|
932 |
+
" -0.0493675135076046,\n",
|
933 |
+
" 0.2681659758090973,\n",
|
934 |
+
" 0.6883248090744019,\n",
|
935 |
+
" 0.14249111711978912,\n",
|
936 |
+
" -0.3902900516986847,\n",
|
937 |
+
" 0.02434423565864563,\n",
|
938 |
+
" 0.8115938305854797,\n",
|
939 |
+
" 0.31366243958473206,\n",
|
940 |
+
" 0.1475793719291687,\n",
|
941 |
+
" 0.8607581853866577,\n",
|
942 |
+
" 1.106387972831726,\n",
|
943 |
+
" -0.12984894216060638,\n",
|
944 |
+
" 0.6475292444229126,\n",
|
945 |
+
" 0.4389672875404358,\n",
|
946 |
+
" -0.14565706253051758,\n",
|
947 |
+
" -0.29327720403671265,\n",
|
948 |
+
" 0.19903028011322021,\n",
|
949 |
+
" 0.44643306732177734,\n",
|
950 |
+
" -0.055179595947265625,\n",
|
951 |
+
" 8.315621376037598,\n",
|
952 |
+
" -0.08598960936069489,\n",
|
953 |
+
" 0.7728097438812256,\n",
|
954 |
+
" 0.1960563361644745,\n",
|
955 |
+
" 0.7582479119300842,\n",
|
956 |
+
" -0.6882674098014832,\n",
|
957 |
+
" -0.22637659311294556,\n",
|
958 |
+
" 0.5025527477264404,\n",
|
959 |
+
" -0.07177169620990753,\n",
|
960 |
+
" -0.03814778849482536,\n",
|
961 |
+
" 1.0206265449523926,\n",
|
962 |
+
" -0.4750046730041504,\n",
|
963 |
+
" 0.015179314650595188,\n",
|
964 |
+
" -0.6247814297676086,\n",
|
965 |
+
" 0.4034382998943329,\n",
|
966 |
+
" 1.700039029121399,\n",
|
967 |
+
" -0.30730658769607544,\n",
|
968 |
+
" 0.28762733936309814,\n",
|
969 |
+
" 0.63616544008255,\n",
|
970 |
+
" -0.23646242916584015,\n",
|
971 |
+
" 0.2806755304336548,\n",
|
972 |
+
" 0.4410918056964874,\n",
|
973 |
+
" 0.14614292979240417,\n",
|
974 |
+
" 0.4948270916938782,\n",
|
975 |
+
" 0.43732860684394836,\n",
|
976 |
+
" 1.0119167566299438,\n",
|
977 |
+
" 0.9210423827171326,\n",
|
978 |
+
" -0.35212814807891846,\n",
|
979 |
+
" 0.32403385639190674,\n",
|
980 |
+
" -0.44126105308532715,\n",
|
981 |
+
" -0.18103229999542236,\n",
|
982 |
+
" -0.31492364406585693,\n",
|
983 |
+
" -0.503863513469696,\n",
|
984 |
+
" -0.26293063163757324,\n",
|
985 |
+
" 0.21797089278697968,\n",
|
986 |
+
" -0.9694619178771973,\n",
|
987 |
+
" 0.021304313093423843,\n",
|
988 |
+
" 0.44222936034202576,\n",
|
989 |
+
" -0.36141523718833923,\n",
|
990 |
+
" -0.463960736989975,\n",
|
991 |
+
" -0.24528658390045166,\n",
|
992 |
+
" 0.11174631118774414,\n",
|
993 |
+
" 0.09441330283880234,\n",
|
994 |
+
" 0.18713852763175964,\n",
|
995 |
+
" 0.36507827043533325,\n",
|
996 |
+
" 0.7508949041366577,\n",
|
997 |
+
" -0.15697608888149261,\n",
|
998 |
+
" 0.4001035690307617,\n",
|
999 |
+
" 1.323508620262146,\n",
|
1000 |
+
" -0.20196901261806488,\n",
|
1001 |
+
" 0.292355477809906,\n",
|
1002 |
+
" 0.34666717052459717,\n",
|
1003 |
+
" -0.11999291181564331,\n",
|
1004 |
+
" -0.6510916352272034,\n",
|
1005 |
+
" 0.4462094306945801,\n",
|
1006 |
+
" -0.45647361874580383,\n",
|
1007 |
+
" -0.14198175072669983,\n",
|
1008 |
+
" -0.4045391082763672,\n",
|
1009 |
+
" 0.7035051584243774,\n",
|
1010 |
+
" 0.3213372826576233,\n",
|
1011 |
+
" 0.5096818804740906,\n",
|
1012 |
+
" 0.6800979971885681,\n",
|
1013 |
+
" -0.008764655329287052,\n",
|
1014 |
+
" -0.19463925063610077,\n",
|
1015 |
+
" -0.7179383635520935,\n",
|
1016 |
+
" 0.2567158043384552,\n",
|
1017 |
+
" 0.07364790141582489,\n",
|
1018 |
+
" -0.222466841340065,\n",
|
1019 |
+
" 0.022669780999422073,\n",
|
1020 |
+
" 0.8473037481307983,\n",
|
1021 |
+
" -0.034888043999671936,\n",
|
1022 |
+
" -0.07169658690690994,\n",
|
1023 |
+
" -0.05516548082232475,\n",
|
1024 |
+
" -0.06913617253303528,\n",
|
1025 |
+
" -0.530577540397644,\n",
|
1026 |
+
" -0.6640213131904602,\n",
|
1027 |
+
" -0.34023773670196533,\n",
|
1028 |
+
" -0.5658687949180603,\n",
|
1029 |
+
" -0.4476564824581146,\n",
|
1030 |
+
" -2.571279287338257,\n",
|
1031 |
+
" -0.12790530920028687,\n",
|
1032 |
+
" 0.9560791850090027,\n",
|
1033 |
+
" -0.6428014039993286,\n",
|
1034 |
+
" -0.4189566671848297,\n",
|
1035 |
+
" -0.20985344052314758,\n",
|
1036 |
+
" 0.47335946559906006,\n",
|
1037 |
+
" -0.11219882220029831,\n",
|
1038 |
+
" -0.10753587633371353,\n",
|
1039 |
+
" 0.14247222244739532,\n",
|
1040 |
+
" 1.059354305267334,\n",
|
1041 |
+
" 0.3302377462387085,\n",
|
1042 |
+
" -0.3935352563858032,\n",
|
1043 |
+
" -0.058758582919836044,\n",
|
1044 |
+
" 0.648691713809967,\n",
|
1045 |
+
" 0.30499130487442017,\n",
|
1046 |
+
" -0.27360308170318604,\n",
|
1047 |
+
" -0.25764214992523193,\n",
|
1048 |
+
" 0.015458552166819572,\n",
|
1049 |
+
" 0.6662879586219788,\n",
|
1050 |
+
" 0.3119010329246521,\n",
|
1051 |
+
" -0.15479373931884766,\n",
|
1052 |
+
" 0.028574924916028976,\n",
|
1053 |
+
" -0.1503346860408783,\n",
|
1054 |
+
" 0.06127818673849106,\n",
|
1055 |
+
" -0.0910576581954956,\n",
|
1056 |
+
" 0.0481022410094738,\n",
|
1057 |
+
" 0.9771047234535217,\n",
|
1058 |
+
" 0.7927762866020203,\n",
|
1059 |
+
" 0.023048892617225647,\n",
|
1060 |
+
" 0.30974704027175903,\n",
|
1061 |
+
" 0.33901262283325195,\n",
|
1062 |
+
" -0.07123278081417084,\n",
|
1063 |
+
" 0.34432730078697205,\n",
|
1064 |
+
" -0.12369780987501144,\n",
|
1065 |
+
" 0.2354590892791748,\n",
|
1066 |
+
" 0.38229313492774963,\n",
|
1067 |
+
" -0.8465576767921448,\n",
|
1068 |
+
" -0.2445705085992813,\n",
|
1069 |
+
" -0.16847288608551025,\n",
|
1070 |
+
" 0.5078030824661255,\n",
|
1071 |
+
" -0.4897501766681671,\n",
|
1072 |
+
" 0.07203903794288635,\n",
|
1073 |
+
" 0.6503809690475464,\n",
|
1074 |
+
" -0.08006825298070908]"
|
1075 |
+
]
|
1076 |
+
},
|
1077 |
+
"execution_count": 11,
|
1078 |
+
"metadata": {},
|
1079 |
+
"output_type": "execute_result"
|
1080 |
+
}
|
1081 |
+
],
|
1082 |
+
"source": [
|
1083 |
+
"# https://vipul-maheshwari.github.io/2024/03/03/multimodal-rag-application\n",
|
1084 |
+
"\n",
|
1085 |
+
"def embed_image(img):\n",
|
1086 |
+
" processed_image = preprocess(img)\n",
|
1087 |
+
" unsqueezed_image = processed_image.unsqueeze(0)\n",
|
1088 |
+
" embeddings = model.encode_image(unsqueezed_image)\n",
|
1089 |
+
" \n",
|
1090 |
+
" # Detach, move to CPU, convert to numpy array, and extract the first element as a list\n",
|
1091 |
+
" result = embeddings.detach().numpy()[0].tolist()\n",
|
1092 |
+
" return result\n",
|
1093 |
+
"\n",
|
1094 |
+
"len(embed_image(image))"
|
1095 |
+
]
|
1096 |
+
},
|
1097 |
+
{
|
1098 |
+
"cell_type": "code",
|
1099 |
+
"execution_count": null,
|
1100 |
+
"metadata": {},
|
1101 |
+
"outputs": [],
|
1102 |
+
"source": [
|
1103 |
+
"def embed_txt(txt):\n",
|
1104 |
+
" tokenized_text = clip.tokenize([txt]).to(device)\n",
|
1105 |
+
" embeddings = model.encode_text(tokenized_text)\n",
|
1106 |
+
" \n",
|
1107 |
+
" # Detach, move to CPU, convert to numpy array, and extract the first element as a list\n",
|
1108 |
+
" result = embeddings.detach().cpu().numpy()[0].tolist()\n",
|
1109 |
+
" return result\n",
|
1110 |
+
"\n",
|
1111 |
+
"res = tbl.search(embed_txt(\"a road with a stop\")).limit(3).to_pandas()\n",
|
1112 |
+
"res"
|
1113 |
+
]
|
1114 |
+
},
|
1115 |
+
{
|
1116 |
+
"cell_type": "code",
|
1117 |
+
"execution_count": null,
|
1118 |
+
"metadata": {},
|
1119 |
+
"outputs": [],
|
1120 |
+
"source": [
|
1121 |
+
"https://blog.lancedb.com/lancedb-polars-2d5eb32a8aa3/\n",
|
1122 |
+
"\n",
|
1123 |
+
"https://github.com/lancedb/lancedb"
|
1124 |
+
]
|
1125 |
+
}
|
1126 |
+
],
|
1127 |
+
"metadata": {
|
1128 |
+
"kernelspec": {
|
1129 |
+
"display_name": "Python 3",
|
1130 |
+
"language": "python",
|
1131 |
+
"name": "python3"
|
1132 |
+
},
|
1133 |
+
"language_info": {
|
1134 |
+
"codemirror_mode": {
|
1135 |
+
"name": "ipython",
|
1136 |
+
"version": 3
|
1137 |
+
},
|
1138 |
+
"file_extension": ".py",
|
1139 |
+
"mimetype": "text/x-python",
|
1140 |
+
"name": "python",
|
1141 |
+
"nbconvert_exporter": "python",
|
1142 |
+
"pygments_lexer": "ipython3",
|
1143 |
+
"version": "3.11.9"
|
1144 |
+
}
|
1145 |
+
},
|
1146 |
+
"nbformat": 4,
|
1147 |
+
"nbformat_minor": 2
|
1148 |
+
}
|
notebook_3.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebook_4.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "grandtheftauto-multimodal-rag-application"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = ""
|
5 |
+
authors = ["henryhyunwookim <[email protected]>"]
|
6 |
+
readme = "README.md"
|
7 |
+
|
8 |
+
[tool.poetry.dependencies]
|
9 |
+
python = "^3.11"
|
10 |
+
pillow = "^10.3.0"
|
11 |
+
datasets = "^2.19.0"
|
12 |
+
ipykernel = "^6.29.4"
|
13 |
+
jupyter = "^1.0.0"
|
14 |
+
ipywidgets = "^8.1.2"
|
15 |
+
matplotlib = "^3.8.4"
|
16 |
+
sentence-transformers = "^2.7.0"
|
17 |
+
lancedb = "^0.6.11"
|
18 |
+
torch = "^2.3.0"
|
19 |
+
clip = {git = "https://github.com/openai/CLIP.git"}
|
20 |
+
chromadb = "^0.5.0"
|
21 |
+
gradio = "^4.32.0"
|
22 |
+
|
23 |
+
|
24 |
+
[tool.poetry.group.dev.dependencies]
|
25 |
+
ipykernel = "^6.29.4"
|
26 |
+
|
27 |
+
[build-system]
|
28 |
+
requires = ["poetry-core"]
|
29 |
+
build-backend = "poetry.core.masonry.api"
|
requirements.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pillow==10.3.0
|
2 |
+
datasets==2.19.0
|
3 |
+
ipykernel==6.29.4
|
4 |
+
jupyter==1.0.0
|
5 |
+
ipywidgets==8.1.2
|
6 |
+
matplotlib==3.8.4
|
7 |
+
sentence-transformers==2.7.0
|
8 |
+
lancedb==0.6.11
|
9 |
+
torch==2.3.0
|
10 |
+
clip @ git+https://github.com/openai/CLIP.git
|
11 |
+
chromadb==0.5.0
|
12 |
+
gradio==4.32.0
|
13 |
+
|
14 |
+
# Development dependencies
|
15 |
+
ipykernel==6.29.4
|
utils/__pycache__/utils.cpython-311.pyc
ADDED
Binary file (8.03 kB). View file
|
|
utils/utils.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
from datetime import datetime
|
4 |
+
from pathlib import Path
|
5 |
+
import pickle
|
6 |
+
from tqdm import tqdm
|
7 |
+
from datasets import load_dataset
|
8 |
+
import chromadb
|
9 |
+
import matplotlib.pyplot as plt
|
10 |
+
|
11 |
+
|
12 |
+
def set_directories():
|
13 |
+
curr_dir = Path(os.getcwd())
|
14 |
+
|
15 |
+
data_dir = curr_dir / 'data'
|
16 |
+
data_pickle_path = data_dir / 'data_set.pkl'
|
17 |
+
|
18 |
+
vectordb_dir = curr_dir / 'vectore_storage'
|
19 |
+
chroma_dir = vectordb_dir / 'chroma'
|
20 |
+
|
21 |
+
for dir in [data_dir, vectordb_dir, chroma_dir]:
|
22 |
+
if not os.path.exists(dir):
|
23 |
+
os.mkdir(dir)
|
24 |
+
|
25 |
+
return data_pickle_path, chroma_dir
|
26 |
+
|
27 |
+
|
28 |
+
def load_data(data_pickle_path, dataset="vipulmaheshwari/GTA-Image-Captioning-Dataset"):
|
29 |
+
if not os.path.exists(data_pickle_path):
|
30 |
+
print(f"Data set hasn't been loaded. Loading from the datasets library and save it as a pickle.")
|
31 |
+
data_set = load_dataset(dataset)
|
32 |
+
with open(data_pickle_path, 'wb') as outfile:
|
33 |
+
pickle.dump(data_set, outfile)
|
34 |
+
else:
|
35 |
+
print(f"Data set already exists in the local drive. Loading it.")
|
36 |
+
with open(data_pickle_path, 'rb') as infile:
|
37 |
+
data_set = pickle.load(infile)
|
38 |
+
|
39 |
+
return data_set
|
40 |
+
|
41 |
+
|
42 |
+
def get_embeddings(data, model):
|
43 |
+
# Get the id and embedding of each data/image
|
44 |
+
ids = []
|
45 |
+
embeddings = []
|
46 |
+
for id, image in tqdm(zip(list(range(len(data))), data)):
|
47 |
+
ids.append("image "+str(id))
|
48 |
+
|
49 |
+
embedding = model.encode(image)
|
50 |
+
embeddings.append(embedding.tolist())
|
51 |
+
|
52 |
+
return ids, embeddings
|
53 |
+
|
54 |
+
|
55 |
+
def get_collection(chroma_dir, model, collection_name, data):
|
56 |
+
client = chromadb.PersistentClient(path=chroma_dir.__str__())
|
57 |
+
collection = client.get_or_create_collection(name=collection_name)
|
58 |
+
|
59 |
+
if collection.count() != len(data):
|
60 |
+
print("Adding embeddings to the collection.")
|
61 |
+
ids, embeddings = get_embeddings(data, model)
|
62 |
+
collection.add(
|
63 |
+
ids=ids,
|
64 |
+
embeddings=embeddings
|
65 |
+
)
|
66 |
+
else:
|
67 |
+
print("Embeddings are already added to the collection.")
|
68 |
+
|
69 |
+
return collection
|
70 |
+
|
71 |
+
|
72 |
+
def get_result(collection, data_set, query, model, n_results=2):
|
73 |
+
# Query the vector store and get results
|
74 |
+
results = collection.query(
|
75 |
+
query_embeddings=model.encode([query]),
|
76 |
+
n_results=2
|
77 |
+
)
|
78 |
+
|
79 |
+
# Get the id of the most relevant image
|
80 |
+
img_id = int(results['ids'][0][0].split('image ')[-1])
|
81 |
+
|
82 |
+
# Get the image and its caption
|
83 |
+
image = data_set['train']['image'][img_id]
|
84 |
+
text = data_set['train']['text'][img_id]
|
85 |
+
|
86 |
+
return image, text
|
87 |
+
|
88 |
+
|
89 |
+
def show_image(image, text, query):
|
90 |
+
plt.ion()
|
91 |
+
plt.axis("off")
|
92 |
+
plt.imshow(image)
|
93 |
+
plt.show()
|
94 |
+
print(f"User query: {query}")
|
95 |
+
print(f"Original description: {text}\n")
|
96 |
+
|
97 |
+
|
98 |
+
def get_logger():
|
99 |
+
log_path = "./log/"
|
100 |
+
if not os.path.exists(log_path):
|
101 |
+
os.mkdir(log_path)
|
102 |
+
|
103 |
+
cur_date = datetime.utcnow().strftime("%Y%m%d")
|
104 |
+
log_filename = f"{log_path}{cur_date}.log"
|
105 |
+
|
106 |
+
logging.basicConfig(
|
107 |
+
filename=log_filename,
|
108 |
+
level=logging.INFO,
|
109 |
+
format="%(asctime)s %(levelname)-8s %(message)s",
|
110 |
+
datefmt="%Y-%m-%d %H:%M:%S")
|
111 |
+
|
112 |
+
logger = logging.getLogger(__name__)
|
113 |
+
|
114 |
+
return logger
|
115 |
+
|
116 |
+
|
117 |
+
def initialization(logger):
|
118 |
+
print("Initializing...")
|
119 |
+
logger.info("Initializing...")
|
120 |
+
print("-------------------------------------------------------")
|
121 |
+
logger.info("-------------------------------------------------------")
|
122 |
+
|
123 |
+
print("Importing functions...")
|
124 |
+
logger.info("Importing functions...")
|
125 |
+
# Import module, classes, and functions
|
126 |
+
from sentence_transformers import SentenceTransformer
|
127 |
+
from utils.utils import set_directories, load_data, get_collection, get_result, show_image
|
128 |
+
|
129 |
+
print("Set directories...")
|
130 |
+
logger.info("Set directories...")
|
131 |
+
# Set directories
|
132 |
+
data_pickle_path, chroma_dir = set_directories()
|
133 |
+
|
134 |
+
print("Loading data...")
|
135 |
+
logger.info("Loading data...")
|
136 |
+
# Load dataset
|
137 |
+
data_set = load_data(data_pickle_path)
|
138 |
+
|
139 |
+
print("Loading CLIP model...")
|
140 |
+
logger.info("Loading CLIP model...")
|
141 |
+
# Load CLIP model
|
142 |
+
model = SentenceTransformer("sentence-transformers/clip-ViT-L-14")
|
143 |
+
|
144 |
+
print("Getting vector embeddings...")
|
145 |
+
logger.info("Getting vector embeddings...")
|
146 |
+
# Get vector embeddings
|
147 |
+
collection = get_collection(chroma_dir, model, collection_name='image_vectors', data=data_set['train']['image'])
|
148 |
+
|
149 |
+
print("-------------------------------------------------------")
|
150 |
+
logger.info("-------------------------------------------------------")
|
151 |
+
print("Initialization completed! Ready for search.")
|
152 |
+
logger.info("Initialization completed! Ready for search.")
|
153 |
+
|
154 |
+
return collection, data_set, model, logger
|
vectore_storage/chroma/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34a5e8e1ac1cff55f102ec9eeb2fb556494f2d1d5c496e76641d5f4aab4feda5
|
3 |
+
size 3473408
|