Spaces:
Build error
Build error
Trent
commited on
Commit
โข
cf349fd
1
Parent(s):
2cd1913
Text to image Search Engine demo
Browse files- app.py +1 -1
- requirements.txt +3 -1
- text2image.py +34 -4
- utils.py +17 -0
app.py
CHANGED
@@ -10,4 +10,4 @@ st.sidebar.title("Navigation")
|
|
10 |
model = st.sidebar.selectbox("Choose a model", ["koclip", "koclip-large"])
|
11 |
page = st.sidebar.selectbox("Choose a task", list(PAGES.keys()))
|
12 |
|
13 |
-
PAGES[page].app(
|
|
|
10 |
model = st.sidebar.selectbox("Choose a model", ["koclip", "koclip-large"])
|
11 |
page = st.sidebar.selectbox("Choose a task", list(PAGES.keys()))
|
12 |
|
13 |
+
PAGES[page].app(model)
|
requirements.txt
CHANGED
@@ -3,4 +3,6 @@ jaxlib
|
|
3 |
flax
|
4 |
transformers
|
5 |
streamlit
|
6 |
-
tqdm
|
|
|
|
|
|
3 |
flax
|
4 |
transformers
|
5 |
streamlit
|
6 |
+
tqdm
|
7 |
+
nmslib
|
8 |
+
matplotlib
|
text2image.py
CHANGED
@@ -1,14 +1,44 @@
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
|
3 |
-
from utils import load_model
|
|
|
|
|
4 |
|
5 |
|
6 |
def app(model_name):
|
7 |
-
|
|
|
8 |
|
|
|
|
|
9 |
|
10 |
-
st.title("Text to Image")
|
11 |
st.markdown("""
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
""")
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
import streamlit as st
|
4 |
|
5 |
+
from utils import load_model, load_index
|
6 |
+
import numpy as np
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
|
9 |
|
10 |
def app(model_name):
|
11 |
+
images_directory = 'images/val2017'
|
12 |
+
features_directory = f'features/val2017/{model_name}.tsv'
|
13 |
|
14 |
+
files, index = load_index(features_directory)
|
15 |
+
model, processor = load_model(f'koclip/{model_name}')
|
16 |
|
17 |
+
st.title("Text to Image Search Engine")
|
18 |
st.markdown("""
|
19 |
+
This demonstration explores capability of KoCLIP as a Korean-language Image search engine. Embeddings for each of
|
20 |
+
5000 images from [MSCOCO](https://cocodataset.org/#home) 2017 validation set was generated using trained KoCLIP
|
21 |
+
vision model. They are ranked based on cosine similarity distance from input Text query embeddings and top 10 images
|
22 |
+
are displayed below.
|
23 |
+
|
24 |
+
KoCLIP is a retraining of OpenAI's CLIP model using 82,783 images from [MSCOCO](https://cocodataset.org/#home) dataset and
|
25 |
+
Korean caption annotations. Korean translation of caption annotations were obtained from [AI Hub](https://aihub.or.kr/keti_data_board/visual_intelligence).
|
26 |
+
|
27 |
+
Example Queries : ์ํํธ(Apartment), ์๋์ฐจ(Car), ์ปดํจํฐ(Computer)
|
28 |
""")
|
29 |
|
30 |
+
query = st.text_input("ํ๊ธ ์ง๋ฌธ์ ์ ์ด์ฃผ์ธ์ (Korean Text Query) :", value="์ํํธ")
|
31 |
+
if st.button("์ง๋ฌธ (Query)"):
|
32 |
+
proc = processor(text=[query], images=None, return_tensors="jax", padding=True)
|
33 |
+
vec = np.asarray(model.get_text_features(**proc))
|
34 |
+
ids, dists = index.knnQuery(vec, k=10)
|
35 |
+
result_files = map(lambda id: files[id], ids)
|
36 |
+
result_imgs, result_captions = [], []
|
37 |
+
for file, dist in zip(result_files, dists):
|
38 |
+
result_imgs.append(plt.imread(os.path.join(images_directory, file)))
|
39 |
+
result_captions.append("{:s} (์ ์ฌ๋: {:.3f})".format(file, 1.0 - dist))
|
40 |
+
|
41 |
+
st.image(result_imgs[:3], caption=result_captions[:3], width=200)
|
42 |
+
st.image(result_imgs[3:6], caption=result_captions[3:6], width=200)
|
43 |
+
st.image(result_imgs[6:9], caption=result_captions[6:9], width=200)
|
44 |
+
st.image(result_imgs[9:], caption=result_captions[9:], width=200)
|
utils.py
CHANGED
@@ -1,8 +1,25 @@
|
|
|
|
1 |
import streamlit as st
|
2 |
from transformers import CLIPProcessor, AutoTokenizer, ViTFeatureExtractor
|
|
|
3 |
|
4 |
from koclip import FlaxHybridCLIP
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
@st.cache(allow_output_mutation=True)
|
8 |
def load_model(model_name="koclip/koclip"):
|
|
|
1 |
+
import nmslib
|
2 |
import streamlit as st
|
3 |
from transformers import CLIPProcessor, AutoTokenizer, ViTFeatureExtractor
|
4 |
+
import numpy as np
|
5 |
|
6 |
from koclip import FlaxHybridCLIP
|
7 |
|
8 |
+
@st.cache(allow_output_mutation=True)
|
9 |
+
def load_index(img_file):
|
10 |
+
filenames, embeddings = [], []
|
11 |
+
lines = open(img_file, "r")
|
12 |
+
for line in lines:
|
13 |
+
cols = line.strip().split('\t')
|
14 |
+
filename = cols[0]
|
15 |
+
embedding = np.array([float(x) for x in cols[1].split(',')])
|
16 |
+
filenames.append(filename)
|
17 |
+
embeddings.append(embedding)
|
18 |
+
embeddings = np.array(embeddings)
|
19 |
+
index = nmslib.init(method='hnsw', space='cosinesimil')
|
20 |
+
index.addDataPointBatch(embeddings)
|
21 |
+
index.createIndex({'post': 2}, print_progress=True)
|
22 |
+
return filenames, index
|
23 |
|
24 |
@st.cache(allow_output_mutation=True)
|
25 |
def load_model(model_name="koclip/koclip"):
|