Image-Processsing commited on
Commit
85b6267
1 Parent(s): 7799bd8

Upload 18 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ yamnet_saved_model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
app.py ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import moviepy.editor as mp
2
+ from flask import Flask, request, jsonify
3
+ from flask_cors import CORS
4
+ import requests
5
+ from io import BytesIO
6
+ import speech_recognition as sr
7
+ import io
8
+ import fitz
9
+ import numpy as np
10
+ import cv2
11
+ from flask_caching import Cache
12
+
13
+ from utils.audioEmbedding.index import extract_audio_embeddings
14
+ from utils.videoEmbedding.index import get_video_embedding
15
+ from utils.imageToText.index import extract_text
16
+ from utils.sentanceEmbedding.index import get_text_vector , get_text_discription_vector
17
+ from utils.imageEmbedding.index import get_image_embedding
18
+ from utils.similarityScore import get_all_similarities
19
+ from utils.objectDetection.index import detect_objects
20
+
21
+ app = Flask(__name__)
22
+ cache = Cache(app, config={'CACHE_TYPE': 'simple'}) # You can choose a caching type based on your requirements
23
+ CORS(app)
24
+ import moviepy.editor as mp
25
+ import tempfile
26
+
27
+ def get_face_locations(binary_data):
28
+ # Convert binary image data to numpy array
29
+ print(1)
30
+ nparr = np.frombuffer(binary_data, np.uint8)
31
+ image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
32
+
33
+ # Load the pre-trained face detection model
34
+ face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
35
+
36
+ # Convert the image to grayscale
37
+ gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
38
+
39
+ # Detect faces in the image
40
+ faces = face_cascade.detectMultiScale(gray_image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
41
+
42
+ # Extract face locations
43
+ print(2)
44
+ face_locations = []
45
+ for (x, y, w, h) in faces:
46
+ face_locations.append({"top": y, "right": x + w, "bottom": y + h, "left": x})
47
+ print(3)
48
+ return face_locations
49
+
50
+ def seperate_image_text_from_pdf(pdf_url):
51
+ # List to store page information
52
+ pages_info = []
53
+
54
+ # Fetch the PDF from the URL
55
+ response = requests.get(pdf_url)
56
+
57
+ if response.status_code == 200:
58
+ # Create a temporary file to save the PDF data
59
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
60
+ tmp_file.write(response.content)
61
+ tmp_file_path = tmp_file.name
62
+
63
+ # Open the PDF
64
+ pdf = fitz.open(tmp_file_path)
65
+
66
+ # Iterate through each page
67
+ for page_num in range(len(pdf)):
68
+ page = pdf.load_page(page_num)
69
+
70
+ # Extract text
71
+ text = page.get_text()
72
+
73
+ # Count images
74
+ image_list = page.get_images(full=True)
75
+
76
+ # Convert images to BytesIO and store in a list
77
+ images_bytes = []
78
+ for img_index, img_info in enumerate(image_list):
79
+ xref = img_info[0]
80
+ base_image = pdf.extract_image(xref)
81
+ image_bytes = base_image["image"]
82
+ images_bytes.append(image_bytes)
83
+
84
+ # Store page information in a dictionary
85
+ page_info = {
86
+ "pgno": page_num + 1,
87
+ "images": images_bytes,
88
+ "text": text
89
+ }
90
+
91
+ # Append page information to the list
92
+ pages_info.append(page_info)
93
+
94
+ # Close the PDF
95
+ pdf.close()
96
+
97
+ # Clean up the temporary file
98
+ import os
99
+ os.unlink(tmp_file_path)
100
+ else:
101
+ print("Failed to fetch the PDF from the URL.")
102
+
103
+ return pages_info
104
+
105
+ def pdf_image_text_embedding_and_text_embedding(pages_info):
106
+ try:
107
+ # List to store page embeddings
108
+ page_embeddings = []
109
+
110
+ # Iterate through each page
111
+ for page in pages_info:
112
+ # Extract text from the page
113
+ text = page["text"]
114
+
115
+ # Extract images from the page
116
+ images = page["images"]
117
+
118
+ # List to store image embeddings
119
+ image_embeddings = []
120
+
121
+ # Iterate through each image
122
+ for image in images:
123
+ try:
124
+ # Assuming image is a binary data (e.g., bytes)
125
+ response = requests.post('https://imageprocessing-backend.hf.space/extract_image_text_and_embedding_binary_data', data=image)
126
+ if response.status_code != 200:
127
+ print(f"Failed to process image: {image}")
128
+ continue
129
+
130
+ result = response.json()
131
+ image_embedding = result.get("image_embedding")
132
+ extracted_text = result.get("extracted_text")
133
+
134
+ # Append the image embedding to the list
135
+ image_embeddings.append({"image_embedding": image_embedding, "extracted_text": extracted_text})
136
+
137
+ print(len(image_embeddings))
138
+ except Exception as e:
139
+ print(f"error")
140
+
141
+ # Get the text embedding
142
+ # Store the page embeddings in a dictionary
143
+ page_embedding = {
144
+ "images": image_embeddings,
145
+ "text": text,
146
+ }
147
+
148
+ # Append the page embedding to the list
149
+ page_embeddings.append(page_embedding)
150
+
151
+ return page_embeddings
152
+ except Exception as e:
153
+ print("An error occurred:", e)
154
+ return "Error"
155
+
156
+
157
+ def separate_audio_from_video(video_url):
158
+ try:
159
+ # Load the video file
160
+ video = mp.VideoFileClip(video_url)
161
+
162
+ # Extract audio
163
+ audio = video.audio
164
+
165
+ # Create a temporary file to write the audio data
166
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
167
+ temp_audio_filename = temp_audio_file.name
168
+
169
+ # Write the audio data to the temporary file
170
+ audio.write_audiofile(temp_audio_filename)
171
+
172
+ # Read the audio data from the temporary file as bytes
173
+ with open(temp_audio_filename, "rb") as f:
174
+ audio_bytes = f.read()
175
+
176
+ return audio_bytes
177
+
178
+ except Exception as e:
179
+ print("An error occurred:", e)
180
+
181
+
182
+
183
+
184
+ @cache.cached(timeout=300)
185
+ @app.route('/get_text_embedding', methods=['POST'])
186
+ def get_text_embedding_route():
187
+ try:
188
+ text = request.json.get("text")
189
+ text_embedding = get_text_vector(text)
190
+ return jsonify({"text_embedding": text_embedding}), 200
191
+
192
+ except Exception as e:
193
+ return jsonify({"error": str(e)}), 500
194
+
195
+
196
+ @cache.cached(timeout=300)
197
+ @app.route('/extract_audio_text_and_embedding', methods=['POST'])
198
+ def get_audio_embedding_route():
199
+ audio_url = request.json.get('audio_url')
200
+ print(audio_url)
201
+ response = requests.get(audio_url)
202
+ audio_data = response.content
203
+ audio_embedding = extract_audio_embeddings(audio_data)
204
+ audio_embedding_list = audio_embedding
205
+ audio_file = BytesIO(audio_data)
206
+ r = sr.Recognizer()
207
+ with sr.AudioFile(audio_file) as source:
208
+ audio_data = r.record(source)
209
+ extracted_text = ""
210
+ try:
211
+ text = r.recognize_google(audio_data)
212
+ extracted_text = text
213
+ except Exception as e:
214
+ print(e)
215
+ return jsonify({"extracted_text": extracted_text, "audio_embedding": audio_embedding_list}), 200
216
+
217
+ # Route to get image embeddings
218
+ @cache.cached(timeout=300)
219
+ @app.route('/extract_image_text_and_embedding', methods=['POST'])
220
+ def get_image_embedding_route():
221
+ try:
222
+ image_url = request.json.get("imageUrl")
223
+ print(image_url)
224
+ response = requests.get(image_url)
225
+ if response.status_code != 200:
226
+ return jsonify({"error": "Failed to download image"}), 500
227
+ binary_data = response.content
228
+ extracted_text = extract_text(binary_data)
229
+ image_embedding = get_image_embedding(binary_data)
230
+ image_embedding_list = image_embedding.tolist()
231
+ return jsonify({"image_embedding": image_embedding_list,"extracted_text":extracted_text}), 200
232
+
233
+ except Exception as e:
234
+ return jsonify({"error": str(e)}), 500
235
+
236
+ # Route to get video embeddings
237
+ @cache.cached(timeout=300)
238
+ @app.route('/extract_video_text_and_embedding', methods=['POST'])
239
+ def get_video_embedding_route():
240
+ try:
241
+ video_url = request.json.get("videoUrl")
242
+ audio_data = separate_audio_from_video(video_url)
243
+ audio_embedding = extract_audio_embeddings(audio_data)
244
+ audio_embedding_list = audio_embedding
245
+ audio_file = io.BytesIO(audio_data)
246
+ r = sr.Recognizer()
247
+ with sr.AudioFile(audio_file) as source:
248
+ audio_data = r.record(source)
249
+ extracted_text = ""
250
+ try:
251
+ text = r.recognize_google(audio_data)
252
+ extracted_text = text
253
+ except Exception as e:
254
+ print(e)
255
+ video_embedding = get_video_embedding(video_url)
256
+ return jsonify({"video_embedding": video_embedding,"extracted_audio_text": extracted_text, "audio_embedding": audio_embedding_list}), 200
257
+
258
+ except Exception as e:
259
+ print(e)
260
+ return jsonify({"error": str(e)}), 500
261
+
262
+ @cache.cached(timeout=300)
263
+ @app.route('/extract_pdf_text_and_embedding', methods=['POST'])
264
+ def extract_pdf_text_and_embedding():
265
+ try:
266
+ pdf_url = request.json.get("pdfUrl")
267
+ print(1)
268
+ pages_info = seperate_image_text_from_pdf(pdf_url)
269
+ # print(pages_info)
270
+ content = pdf_image_text_embedding_and_text_embedding(pages_info)
271
+ # print(content)
272
+ return jsonify({"content": content}), 200
273
+
274
+ except Exception as e:
275
+ return jsonify({"error": str(e)}), 500
276
+
277
+ # Route to get text description embeddings
278
+ @cache.cached(timeout=300)
279
+ @app.route('/getTextDescriptionEmbedding', methods=['POST'])
280
+ def get_text_description_embedding_route():
281
+ try:
282
+ text = request.json.get("text")
283
+ text_description_embedding = get_text_discription_vector(text)
284
+ return jsonify({"text_description_embedding": text_description_embedding.tolist()}), 200
285
+
286
+ except Exception as e:
287
+ return jsonify({"error": str(e)}), 500
288
+
289
+
290
+
291
+ # Route to get object detection results
292
+ @cache.cached(timeout=300)
293
+ @app.route('/detectObjects', methods=['POST'])
294
+ def detect_objects_route():
295
+ try:
296
+ image_url = request.json.get("imageUrl")
297
+ response = requests.get(image_url)
298
+ if response.status_code != 200:
299
+ return jsonify({"error": "Failed to download image"}), 500
300
+ binary_data = response.content
301
+ object_detection_results = detect_objects(binary_data)
302
+ return jsonify({"object_detection_results": object_detection_results}), 200
303
+
304
+ except Exception as e:
305
+ return jsonify({"error": str(e)}), 500
306
+
307
+ # Route to get face locations
308
+ @cache.cached(timeout=300)
309
+ @app.route('/getFaceLocations', methods=['POST'])
310
+ def get_face_locations_route():
311
+ try:
312
+ image_url = request.json.get("imageUrl")
313
+ response = requests.get(image_url)
314
+ print(11)
315
+ if response.status_code != 200:
316
+ return jsonify({"error": "Failed to download image"}), 500
317
+ print(22)
318
+ binary_data = response.content
319
+ face_locations = get_face_locations(binary_data)
320
+ print(33)
321
+ print("ok",face_locations)
322
+ return jsonify({"face_locations": str(face_locations)}), 200
323
+
324
+ except Exception as e:
325
+ print(e)
326
+ return jsonify({"error": str(e)}), 500
327
+
328
+ # Route to get similarity score
329
+ @cache.cached(timeout=300)
330
+ @app.route('/getSimilarityScore', methods=['POST'])
331
+ def get_similarity_score_route():
332
+ try:
333
+ embedding1 = request.json.get("embedding1")
334
+ embedding2 = request.json.get("embedding2")
335
+ # Assuming embeddings are provided as lists
336
+ similarity_score = get_all_similarities(embedding1, embedding2)
337
+ return jsonify({"similarity_score": similarity_score}), 200
338
+
339
+ except Exception as e:
340
+ return jsonify({"error": str(e)}), 500
341
+
342
+ @app.route('/')
343
+ def hello():
344
+ return 'Hello, World!'
345
+
346
+ app.run()
requirements.txt ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.1.0
2
+ aiohttp==3.9.3
3
+ aiosignal==1.3.1
4
+ annotated-types==0.6.0
5
+ anyio==4.3.0
6
+ asgiref==3.7.2
7
+ astunparse==1.6.3
8
+ attrs==23.2.0
9
+ audioread==3.0.1
10
+ beautifulsoup4==4.12.3
11
+ blinker==1.7.0
12
+ cachelib==0.9.0
13
+ certifi==2024.2.2
14
+ cffi==1.16.0
15
+ charset-normalizer==3.3.2
16
+ click==8.1.7
17
+ colorama==0.4.6
18
+ decorator==4.4.2
19
+ distro==1.9.0
20
+ Django==5.0.1
21
+ django-cors-headers==4.3.1
22
+ django-restframework==0.0.1
23
+ djangorestframework==3.14.0
24
+ dlib==19.24.2
25
+ et-xmlfile==1.1.0
26
+ face-recognition==1.3.0
27
+ face_recognition_models==0.3.0
28
+ filelock==3.13.3
29
+ Flask==3.0.2
30
+ Flask-Caching==2.1.0
31
+ Flask-Cors==4.0.0
32
+ flatbuffers==24.3.25
33
+ frozenlist==1.4.1
34
+ fsspec==2024.3.1
35
+ gast==0.5.4
36
+ gensim==4.3.2
37
+ google-pasta==0.2.0
38
+ grpcio==1.62.1
39
+ h11==0.14.0
40
+ h5py==3.10.0
41
+ httpcore==1.0.4
42
+ httpx==0.27.0
43
+ huggingface-hub==0.22.2
44
+ idna==3.6
45
+ imageio==2.34.0
46
+ imageio-ffmpeg==0.4.9
47
+ itsdangerous==2.1.2
48
+ Jinja2==3.1.3
49
+ joblib==1.3.2
50
+ keras==3.1.1
51
+ lazy_loader==0.3
52
+ libclang==18.1.1
53
+ librosa==0.10.1
54
+ llvmlite==0.42.0
55
+ Markdown==3.6
56
+ markdown-it-py==3.0.0
57
+ MarkupSafe==2.1.5
58
+ mdurl==0.1.2
59
+ ml-dtypes==0.3.2
60
+ moviepy==1.0.3
61
+ mpmath==1.3.0
62
+ msgpack==1.0.8
63
+ multidict==6.0.5
64
+ namex==0.0.7
65
+ networkx==3.2.1
66
+ numba==0.59.1
67
+ numpy==1.26.3
68
+ openai==0.28.0
69
+ opencv-python==4.9.0.80
70
+ openpyxl==3.1.2
71
+ opt-einsum==3.3.0
72
+ optree==0.11.0
73
+ outcome==1.3.0.post0
74
+ packaging==24.0
75
+ pandas==2.2.0
76
+ pillow==10.3.0
77
+ platformdirs==4.2.0
78
+ pooch==1.8.1
79
+ proglog==0.1.10
80
+ protobuf==4.25.3
81
+ pycparser==2.22
82
+ pydantic==2.6.3
83
+ pydantic_core==2.16.3
84
+ pydub==0.25.1
85
+ Pygments==2.17.2
86
+ PyMuPDF==1.24.0
87
+ PyMuPDFb==1.24.0
88
+ PySocks==1.7.1
89
+ python-dateutil==2.8.2
90
+ python-dotenv==1.0.1
91
+ pytz==2023.4
92
+ PyYAML==6.0.1
93
+ regex==2023.12.25
94
+ requests==2.31.0
95
+ rich==13.7.1
96
+ safetensors==0.4.2
97
+ scikit-learn==1.4.1.post1
98
+ scipy==1.12.0
99
+ selenium==4.19.0
100
+ setuptools==69.2.0
101
+ six==1.16.0
102
+ smart-open==7.0.4
103
+ sniffio==1.3.1
104
+ sortedcontainers==2.4.0
105
+ soundfile==0.12.1
106
+ soupsieve==2.5
107
+ soxr==0.3.7
108
+ SpeechRecognition==3.10.1
109
+ sqlparse==0.4.4
110
+ sympy==1.12
111
+ tensorboard==2.16.2
112
+ tensorboard-data-server==0.7.2
113
+ tensorflow==2.16.1
114
+ tensorflow-intel==2.16.1
115
+ termcolor==2.4.0
116
+ tf_keras==2.16.0
117
+ threadpoolctl==3.4.0
118
+ timm==0.9.16
119
+ tokenizers==0.15.2
120
+ torch==2.2.2
121
+ torchvision==0.17.2
122
+ tqdm==4.66.2
123
+ transformers==4.39.2
124
+ trio==0.25.0
125
+ trio-websocket==0.11.1
126
+ typing_extensions==4.10.0
127
+ tzdata==2023.4
128
+ urllib3==2.2.1
129
+ webdriver-manager==4.0.1
130
+ Werkzeug==3.0.1
131
+ wheel==0.43.0
132
+ wrapt==1.16.0
133
+ wsproto==1.2.0
134
+ yarl==1.9.4
utils/ImageAndTextEmbedding/index.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import io
3
+ from transformers import AutoTokenizer, CLIPProcessor, CLIPModel
4
+ import torch
5
+
6
+ # Load CLIP model and processor
7
+ model_name = "openai/clip-vit-base-patch32"
8
+ loaded_model = CLIPModel.from_pretrained(model_name)
9
+ loaded_processor = CLIPProcessor.from_pretrained(model_name)
10
+
11
+ def getTextEmbedding(text):
12
+ # Preprocess the text
13
+ print("tear")
14
+ inputs_text = loaded_processor(text=[text], return_tensors="pt", padding=True)
15
+ print("here")
16
+ # Forward pass through the model
17
+ with torch.no_grad():
18
+ # Get the text features
19
+ text_features = loaded_model.get_text_features(input_ids=inputs_text.input_ids, attention_mask=inputs_text.attention_mask)
20
+ print("bear")
21
+ # Convert tensor to numpy array for better readability
22
+ text_embedding = text_features.squeeze().numpy()
23
+ print("done")
24
+ return text_embedding
25
+
26
+ def getImageEmbedding(binary_image_data):
27
+ # Load and preprocess the image
28
+ image = Image.open(io.BytesIO(binary_image_data))
29
+ inputs = loaded_processor(images=image, return_tensors="pt", padding=True)
30
+
31
+ # Forward pass through the model
32
+ with torch.no_grad():
33
+ # Get the image features
34
+ image_features = loaded_model.get_image_features(pixel_values=inputs.pixel_values)
35
+
36
+ # Convert tensor to numpy array for better readability
37
+ image_embedding = image_features.squeeze().numpy()
38
+
39
+ return image_embedding
40
+
utils/audioEmbedding/index.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ import librosa
4
+ import pickle
5
+ import io
6
+
7
+ # Load the YAMNet model from the SavedModel format
8
+ yamnet_model = tf.saved_model.load('yamnet_saved_model')
9
+
10
+ # Function to extract embeddings from audio file using YAMNet
11
+ def extract_audio_embeddings(audio_binary):
12
+ # Load audio from binary data using librosa
13
+ audio, sample_rate = librosa.load(io.BytesIO(audio_binary), sr=16000) # YAMNet requires a sample rate of 16kHz
14
+ # Convert audio to float32 tensor
15
+ audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32)
16
+ # Extract embeddings using YAMNet model
17
+ scores, embeddings, spectrogram = yamnet_model(audio_tensor)
18
+ embeddings_list = embeddings.numpy().tolist() # Convert embeddings to a list of lists
19
+ return embeddings_list
20
+
21
+ # Example usage
22
+ if __name__ == "__main__":
23
+ image_audio_path = "pictures/users/1a.mp3"
24
+ # Extract embeddings from image audio file
25
+ image_audio_embeddings = extract_audio_embeddings(image_audio_path)
26
+ print("Embeddings for", image_audio_path)
27
+ print(image_audio_embeddings)
28
+ print("audio embedding model loaded succesfully")
utils/imageEmbedding/index.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ from torchvision import transforms
3
+ from PIL import Image
4
+ import torch
5
+ import io
6
+ from utils.ImageAndTextEmbedding.index import getImageEmbedding
7
+
8
+ def get_image_embedding(image_bytes):
9
+ print("comming 1")
10
+ return getImageEmbedding(image_bytes)
11
+
12
+ # Example: Load image data from file and get its embedding
13
+ # image_data = open("pictures/users/2.jpg", "rb").read()
14
+ # embedding = get_image_embedding(image_data)
15
+ # print(embedding)
16
+
17
+ print("Image embedding model loaded successfully!")
utils/imageToText/index.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import re
3
+ from PIL import Image
4
+ from transformers import pipeline
5
+ import io
6
+
7
+ def clean_text(text):
8
+ clean_text = re.sub(r'<[^>]+>', '', text)
9
+ clean_text = clean_text.strip()
10
+ clean_text = re.sub(r'\s+', ' ', clean_text)
11
+ return clean_text
12
+
13
+ pipe = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD")
14
+
15
+ def extract_text(binary_image):
16
+ image = Image.open(io.BytesIO(binary_image))
17
+ result = pipe(image)
18
+ text = result[0]['generated_text']
19
+ cleaned_text = clean_text(text)
20
+ return cleaned_text
21
+
22
+ # print(extract_text(open("pictures/users/2.jpg", "rb").read()))
23
+
24
+ print("OCR pipeline loaded successfully!")
utils/objectDetection/index.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from PIL import Image
3
+ from io import BytesIO
4
+
5
+ # Load the object detection pipeline
6
+ object_detection_pipeline = pipeline("object-detection", model="ciasimbaya/ObjectDetection")
7
+ def detect_objects(image_bytes):
8
+ image = Image.open(BytesIO(image_bytes))
9
+ result = object_detection_pipeline(image)
10
+ return result
11
+
12
+ print("object detection model loaded succesfully")
utils/sample.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ # Define the image URL
4
+ image_url = "https://utfs.io/f/47589c6c-6ce0-4baf-b75d-b1ec5d4d9dda-213j1w.jpg"
5
+ audio_url = "https://utfs.io/f/b84a84a2-b68f-49c5-8b7c-d76d894f6d3a-c5qjj4.wav"
6
+ video_url = "https://utfs.io/f/ef6c037f-fa61-471a-8956-562bc2d62531-fzxs1i.mp4"
7
+ family_url = "https://i.pinimg.com/originals/b2/20/14/b22014ca275e94097386aab222469caf.jpg"
8
+
9
+
10
+ # Define the URLs of the three nodes
11
+ extract_text_url = "http://127.0.0.1:5000/extractText"
12
+ extract_audio_text_url = "http://127.0.0.1:5000/extractAudioText"
13
+ get_image_embedding_url = "http://127.0.0.1:5000/getImageEmbedding"
14
+ get_text_embedding_url = "http://127.0.0.1:5000/getTextEmbedding"
15
+ get_text_description_embedding_url = "http://127.0.0.1:5000/getTextDescriptionEmbedding"
16
+ get_audio_embedding_url = "http://127.0.0.1:5000/getAudioEmbedding"
17
+ get_audio_extracted_text_url = "http://127.0.0.1:5000/getAudioExtractedText"
18
+ get_video_embedding_url = "http://127.0.0.1:5000/getVideoEmbedding"
19
+ get_object_detection_url = "http://127.0.0.1:5000/detectObjects"
20
+ get_similarity_score_url = "http://127.0.0.1:5000/getSimilarityScore"
21
+ get_face_locations_url = "http://127.0.0.1:5000/getFaceLocations"
22
+
23
+ # Make requests to each node with the image URL
24
+ try:
25
+ list=[]
26
+
27
+ response_text = requests.post(extract_audio_text_url, json={"audio_url": audio_url})
28
+ extracted_text = response_text.json()["transcription"]
29
+ list.append({"length of text":len(extracted_text)})
30
+
31
+ # # Request to extract text
32
+ # response_text = requests.post(extract_text_url, json={"imageUrl": image_url})
33
+ # extracted_text = response_text.json().get("extracted_text")
34
+ # list.append({"length of text":len(extracted_text)})
35
+
36
+ # # Request to get image embedding
37
+ # response_image_embedding = requests.post(get_image_embedding_url, json={"imageUrl": image_url})
38
+ # image_embedding = response_image_embedding.json().get("image_embedding")
39
+ # list.append({"length of image_embedding":len(image_embedding)})
40
+
41
+ # # Request to get text embedding
42
+ # response_text_embedding = requests.post(get_text_embedding_url, json={"text": extracted_text})
43
+ # text_embedding = response_text_embedding.json().get("text_embedding")
44
+ # list.append({"length of text_embedding":len(text_embedding)})
45
+
46
+ # # Request to get text description embedding
47
+ # response_text_description_embedding = requests.post(get_text_description_embedding_url, json={"text": "a image of mobile phone"})
48
+ # text_description_embedding = response_text_description_embedding.json().get("text_description_embedding")
49
+ # list.append({"length of text_description_embedding":len(text_description_embedding)})
50
+
51
+ # # Request to get audio embedding
52
+ # response_audio_embedding = requests.post(get_audio_embedding_url, json={"audioUrl": audio_url})
53
+ # audio_embedding = response_audio_embedding.json().get("audio_embedding")
54
+ # list.append({"length of audio_embedding":len(audio_embedding)})
55
+
56
+ # Request to get video embedding
57
+ response_video_embedding = requests.post(get_video_embedding_url, json={"videoUrl": video_url})
58
+ video_embedding = response_video_embedding.json().get("video_embedding")
59
+ list.append({"length of video_embedding":(video_embedding)})
60
+
61
+ # # Request to get object detection
62
+ # response_object_detection = requests.post(get_object_detection_url, json={"imageUrl": image_url})
63
+ # object_detection = response_object_detection.json().get("object_detection_results")
64
+ # list.append({"length of object_detection":len(object_detection)})
65
+
66
+ # # Request to get similarity score
67
+ # response_similarity_score = requests.post(get_similarity_score_url, json={"embedding1": text_description_embedding, "embedding2": image_embedding})
68
+ # similarity_score = response_similarity_score.json().get("similarity_score")
69
+ # list.append({"similarity_score":similarity_score})
70
+
71
+ # # Request to get face locations
72
+ # response_face_locations = requests.post(get_face_locations_url, json={"imageUrl": family_url})
73
+ # face_locations = response_face_locations.json().get("face_locations")
74
+ # list.append({"face_locations":face_locations})
75
+ print(list)
76
+ except Exception as e:
77
+ print("Error:", e)
utils/sentanceEmbedding/index.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ from utils.ImageAndTextEmbedding.index import getTextEmbedding
3
+
4
+ with open("word2vec_model.pkl", "rb") as f:
5
+ textEmbedding_model = pickle.load(f)
6
+
7
+ def get_text_vector(example_text):
8
+ # Tokenize the text into words
9
+ words = example_text.lower().split()
10
+
11
+ # Filter out words that are not in the vocabulary of the Word2Vec model
12
+ words_in_vocab = [word for word in words if word in textEmbedding_model]
13
+
14
+ # Calculate the average vector representation of the words
15
+ if words_in_vocab:
16
+ text_vector = sum(textEmbedding_model[word] for word in words_in_vocab) / len(words_in_vocab)
17
+ return text_vector.tolist()
18
+ else:
19
+ return None
20
+
21
+ def get_text_discription_vector(text):
22
+ return getTextEmbedding(text)
23
+
24
+ # Example usage:
25
+ # example_text = "This is an example sentence."
26
+ # text_vector = get_text_vector(example_text)
27
+ # if text_vector:
28
+ # print("Vector representation of the example text:", text_vector)
29
+ # else:
30
+ # print("None of the words in the example text are in the vocabulary of the Word2Vec model.")
31
+
32
+ print("Text embedding model loaded successfully!")
utils/similarityScore.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ def euclidean_similarity(embedding1, embedding2):
4
+ embedding1 = np.array(embedding1)
5
+ embedding2 = np.array(embedding2)
6
+ euclidean_distance = np.linalg.norm(embedding1 - embedding2)
7
+ # Convert distance to similarity score
8
+ similarity_score = 1 / (1 + euclidean_distance) # You can use other transformations as well
9
+ return similarity_score
10
+
11
+ def cosine_similarity(embedding1, embedding2):
12
+ dot_product = np.dot(embedding1, embedding2)
13
+ norm1 = np.linalg.norm(embedding1)
14
+ norm2 = np.linalg.norm(embedding2)
15
+ cosine_similarity = dot_product / (norm1 * norm2)
16
+ return cosine_similarity
17
+
18
+ def jaccard_similarity(embedding1, embedding2):
19
+ intersection = len(set(embedding1).intersection(set(embedding2)))
20
+ union = len(set(embedding1).union(set(embedding2)))
21
+ return intersection / union
22
+
23
+ def hamming_similarity(embedding1, embedding2):
24
+ distance = np.count_nonzero(embedding1 != embedding2)
25
+ similarity = 1 - distance / len(embedding1)
26
+ return similarity
27
+
28
+ def get_all_similarities(embedding1, embedding2):
29
+ euclidean = euclidean_similarity(embedding1, embedding2)
30
+ cosine = cosine_similarity(embedding1, embedding2)
31
+ jaccard = jaccard_similarity(embedding1, embedding2)
32
+ hamming = hamming_similarity(embedding1, embedding2)
33
+ return {"euclidean": euclidean, "cosine": cosine, "jaccard": jaccard, "hamming": hamming}
34
+
35
+ # Example usage:
36
+ # embedding1 = [1, 2, 3]
37
+ # embedding2 = [4, 5, 6]
38
+ # similarities = get_all_similarities(embedding1, embedding2)
39
+ # print(similarities)
40
+
41
+ print("Similarity score is working")
utils/videoEmbedding/index.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from utils.imageEmbedding.index import get_image_embedding
4
+ from utils.imageToText.index import extract_text
5
+ import requests
6
+
7
+
8
+ def get_video_embedding(video_url):
9
+ try:
10
+ cap = cv2.VideoCapture(video_url)
11
+ fps = cap.get(cv2.CAP_PROP_FPS)
12
+ interval = int(fps) # Capture a frame every second
13
+
14
+ frame_count = 0
15
+ video_embeddings = []
16
+
17
+ while(cap.isOpened()):
18
+ ret, frame = cap.read()
19
+ if ret:
20
+ if frame_count % interval == 0:
21
+ # Convert frame to binary format
22
+ ret, buffer = cv2.imencode('.jpg', frame)
23
+ if not ret:
24
+ continue
25
+ # Convert frame binary data to bytes
26
+ frame_bytes = buffer.tobytes()
27
+
28
+ # Call the route to get image embedding and extracted text
29
+ response = requests.post('https://imageprocessing-backend.hf.space/extract_image_text_and_embedding_binary_data', data=frame_bytes)
30
+ if response.status_code != 200:
31
+ print(f"Failed to process image: {frame_bytes}")
32
+ continue
33
+
34
+ result = response.json()
35
+ image_embedding = result.get("image_embedding")
36
+ extracted_text = result.get("extracted_text")
37
+
38
+ video_embeddings.append({"image_embedding": image_embedding, "extracted_text": extracted_text})
39
+ frame_count += 1
40
+ else:
41
+ break
42
+
43
+ cap.release()
44
+ return video_embeddings
45
+
46
+ except Exception as e:
47
+ print(e)
48
+
49
+
50
+ # Example usage:
51
+ # video_url = "https://utfs.io/f/ef6c037f-fa61-471a-8956-562bc2d62531-fzxs1i.mp4"
52
+ # video_embeddings = get_video_embedding(video_url)
53
+ # print("Video Embeddings:", video_embeddings)
word2vec_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa69b6e92ca17e1d8c76c072b75b4c5458f1e5ff1a882962549a3d7141c85e6f
3
+ size 3704150289
yamnet_saved_model/assets/yamnet_class_map.csv ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ index,mid,display_name
2
+ 0,/m/09x0r,Speech
3
+ 1,/m/0ytgt,"Child speech, kid speaking"
4
+ 2,/m/01h8n0,Conversation
5
+ 3,/m/02qldy,"Narration, monologue"
6
+ 4,/m/0261r1,Babbling
7
+ 5,/m/0brhx,Speech synthesizer
8
+ 6,/m/07p6fty,Shout
9
+ 7,/m/07q4ntr,Bellow
10
+ 8,/m/07rwj3x,Whoop
11
+ 9,/m/07sr1lc,Yell
12
+ 10,/t/dd00135,Children shouting
13
+ 11,/m/03qc9zr,Screaming
14
+ 12,/m/02rtxlg,Whispering
15
+ 13,/m/01j3sz,Laughter
16
+ 14,/t/dd00001,Baby laughter
17
+ 15,/m/07r660_,Giggle
18
+ 16,/m/07s04w4,Snicker
19
+ 17,/m/07sq110,Belly laugh
20
+ 18,/m/07rgt08,"Chuckle, chortle"
21
+ 19,/m/0463cq4,"Crying, sobbing"
22
+ 20,/t/dd00002,"Baby cry, infant cry"
23
+ 21,/m/07qz6j3,Whimper
24
+ 22,/m/07qw_06,"Wail, moan"
25
+ 23,/m/07plz5l,Sigh
26
+ 24,/m/015lz1,Singing
27
+ 25,/m/0l14jd,Choir
28
+ 26,/m/01swy6,Yodeling
29
+ 27,/m/02bk07,Chant
30
+ 28,/m/01c194,Mantra
31
+ 29,/t/dd00005,Child singing
32
+ 30,/t/dd00006,Synthetic singing
33
+ 31,/m/06bxc,Rapping
34
+ 32,/m/02fxyj,Humming
35
+ 33,/m/07s2xch,Groan
36
+ 34,/m/07r4k75,Grunt
37
+ 35,/m/01w250,Whistling
38
+ 36,/m/0lyf6,Breathing
39
+ 37,/m/07mzm6,Wheeze
40
+ 38,/m/01d3sd,Snoring
41
+ 39,/m/07s0dtb,Gasp
42
+ 40,/m/07pyy8b,Pant
43
+ 41,/m/07q0yl5,Snort
44
+ 42,/m/01b_21,Cough
45
+ 43,/m/0dl9sf8,Throat clearing
46
+ 44,/m/01hsr_,Sneeze
47
+ 45,/m/07ppn3j,Sniff
48
+ 46,/m/06h7j,Run
49
+ 47,/m/07qv_x_,Shuffle
50
+ 48,/m/07pbtc8,"Walk, footsteps"
51
+ 49,/m/03cczk,"Chewing, mastication"
52
+ 50,/m/07pdhp0,Biting
53
+ 51,/m/0939n_,Gargling
54
+ 52,/m/01g90h,Stomach rumble
55
+ 53,/m/03q5_w,"Burping, eructation"
56
+ 54,/m/02p3nc,Hiccup
57
+ 55,/m/02_nn,Fart
58
+ 56,/m/0k65p,Hands
59
+ 57,/m/025_jnm,Finger snapping
60
+ 58,/m/0l15bq,Clapping
61
+ 59,/m/01jg02,"Heart sounds, heartbeat"
62
+ 60,/m/01jg1z,Heart murmur
63
+ 61,/m/053hz1,Cheering
64
+ 62,/m/028ght,Applause
65
+ 63,/m/07rkbfh,Chatter
66
+ 64,/m/03qtwd,Crowd
67
+ 65,/m/07qfr4h,"Hubbub, speech noise, speech babble"
68
+ 66,/t/dd00013,Children playing
69
+ 67,/m/0jbk,Animal
70
+ 68,/m/068hy,"Domestic animals, pets"
71
+ 69,/m/0bt9lr,Dog
72
+ 70,/m/05tny_,Bark
73
+ 71,/m/07r_k2n,Yip
74
+ 72,/m/07qf0zm,Howl
75
+ 73,/m/07rc7d9,Bow-wow
76
+ 74,/m/0ghcn6,Growling
77
+ 75,/t/dd00136,Whimper (dog)
78
+ 76,/m/01yrx,Cat
79
+ 77,/m/02yds9,Purr
80
+ 78,/m/07qrkrw,Meow
81
+ 79,/m/07rjwbb,Hiss
82
+ 80,/m/07r81j2,Caterwaul
83
+ 81,/m/0ch8v,"Livestock, farm animals, working animals"
84
+ 82,/m/03k3r,Horse
85
+ 83,/m/07rv9rh,Clip-clop
86
+ 84,/m/07q5rw0,"Neigh, whinny"
87
+ 85,/m/01xq0k1,"Cattle, bovinae"
88
+ 86,/m/07rpkh9,Moo
89
+ 87,/m/0239kh,Cowbell
90
+ 88,/m/068zj,Pig
91
+ 89,/t/dd00018,Oink
92
+ 90,/m/03fwl,Goat
93
+ 91,/m/07q0h5t,Bleat
94
+ 92,/m/07bgp,Sheep
95
+ 93,/m/025rv6n,Fowl
96
+ 94,/m/09b5t,"Chicken, rooster"
97
+ 95,/m/07st89h,Cluck
98
+ 96,/m/07qn5dc,"Crowing, cock-a-doodle-doo"
99
+ 97,/m/01rd7k,Turkey
100
+ 98,/m/07svc2k,Gobble
101
+ 99,/m/09ddx,Duck
102
+ 100,/m/07qdb04,Quack
103
+ 101,/m/0dbvp,Goose
104
+ 102,/m/07qwf61,Honk
105
+ 103,/m/01280g,Wild animals
106
+ 104,/m/0cdnk,"Roaring cats (lions, tigers)"
107
+ 105,/m/04cvmfc,Roar
108
+ 106,/m/015p6,Bird
109
+ 107,/m/020bb7,"Bird vocalization, bird call, bird song"
110
+ 108,/m/07pggtn,"Chirp, tweet"
111
+ 109,/m/07sx8x_,Squawk
112
+ 110,/m/0h0rv,"Pigeon, dove"
113
+ 111,/m/07r_25d,Coo
114
+ 112,/m/04s8yn,Crow
115
+ 113,/m/07r5c2p,Caw
116
+ 114,/m/09d5_,Owl
117
+ 115,/m/07r_80w,Hoot
118
+ 116,/m/05_wcq,"Bird flight, flapping wings"
119
+ 117,/m/01z5f,"Canidae, dogs, wolves"
120
+ 118,/m/06hps,"Rodents, rats, mice"
121
+ 119,/m/04rmv,Mouse
122
+ 120,/m/07r4gkf,Patter
123
+ 121,/m/03vt0,Insect
124
+ 122,/m/09xqv,Cricket
125
+ 123,/m/09f96,Mosquito
126
+ 124,/m/0h2mp,"Fly, housefly"
127
+ 125,/m/07pjwq1,Buzz
128
+ 126,/m/01h3n,"Bee, wasp, etc."
129
+ 127,/m/09ld4,Frog
130
+ 128,/m/07st88b,Croak
131
+ 129,/m/078jl,Snake
132
+ 130,/m/07qn4z3,Rattle
133
+ 131,/m/032n05,Whale vocalization
134
+ 132,/m/04rlf,Music
135
+ 133,/m/04szw,Musical instrument
136
+ 134,/m/0fx80y,Plucked string instrument
137
+ 135,/m/0342h,Guitar
138
+ 136,/m/02sgy,Electric guitar
139
+ 137,/m/018vs,Bass guitar
140
+ 138,/m/042v_gx,Acoustic guitar
141
+ 139,/m/06w87,"Steel guitar, slide guitar"
142
+ 140,/m/01glhc,Tapping (guitar technique)
143
+ 141,/m/07s0s5r,Strum
144
+ 142,/m/018j2,Banjo
145
+ 143,/m/0jtg0,Sitar
146
+ 144,/m/04rzd,Mandolin
147
+ 145,/m/01bns_,Zither
148
+ 146,/m/07xzm,Ukulele
149
+ 147,/m/05148p4,Keyboard (musical)
150
+ 148,/m/05r5c,Piano
151
+ 149,/m/01s0ps,Electric piano
152
+ 150,/m/013y1f,Organ
153
+ 151,/m/03xq_f,Electronic organ
154
+ 152,/m/03gvt,Hammond organ
155
+ 153,/m/0l14qv,Synthesizer
156
+ 154,/m/01v1d8,Sampler
157
+ 155,/m/03q5t,Harpsichord
158
+ 156,/m/0l14md,Percussion
159
+ 157,/m/02hnl,Drum kit
160
+ 158,/m/0cfdd,Drum machine
161
+ 159,/m/026t6,Drum
162
+ 160,/m/06rvn,Snare drum
163
+ 161,/m/03t3fj,Rimshot
164
+ 162,/m/02k_mr,Drum roll
165
+ 163,/m/0bm02,Bass drum
166
+ 164,/m/011k_j,Timpani
167
+ 165,/m/01p970,Tabla
168
+ 166,/m/01qbl,Cymbal
169
+ 167,/m/03qtq,Hi-hat
170
+ 168,/m/01sm1g,Wood block
171
+ 169,/m/07brj,Tambourine
172
+ 170,/m/05r5wn,Rattle (instrument)
173
+ 171,/m/0xzly,Maraca
174
+ 172,/m/0mbct,Gong
175
+ 173,/m/016622,Tubular bells
176
+ 174,/m/0j45pbj,Mallet percussion
177
+ 175,/m/0dwsp,"Marimba, xylophone"
178
+ 176,/m/0dwtp,Glockenspiel
179
+ 177,/m/0dwt5,Vibraphone
180
+ 178,/m/0l156b,Steelpan
181
+ 179,/m/05pd6,Orchestra
182
+ 180,/m/01kcd,Brass instrument
183
+ 181,/m/0319l,French horn
184
+ 182,/m/07gql,Trumpet
185
+ 183,/m/07c6l,Trombone
186
+ 184,/m/0l14_3,Bowed string instrument
187
+ 185,/m/02qmj0d,String section
188
+ 186,/m/07y_7,"Violin, fiddle"
189
+ 187,/m/0d8_n,Pizzicato
190
+ 188,/m/01xqw,Cello
191
+ 189,/m/02fsn,Double bass
192
+ 190,/m/085jw,"Wind instrument, woodwind instrument"
193
+ 191,/m/0l14j_,Flute
194
+ 192,/m/06ncr,Saxophone
195
+ 193,/m/01wy6,Clarinet
196
+ 194,/m/03m5k,Harp
197
+ 195,/m/0395lw,Bell
198
+ 196,/m/03w41f,Church bell
199
+ 197,/m/027m70_,Jingle bell
200
+ 198,/m/0gy1t2s,Bicycle bell
201
+ 199,/m/07n_g,Tuning fork
202
+ 200,/m/0f8s22,Chime
203
+ 201,/m/026fgl,Wind chime
204
+ 202,/m/0150b9,Change ringing (campanology)
205
+ 203,/m/03qjg,Harmonica
206
+ 204,/m/0mkg,Accordion
207
+ 205,/m/0192l,Bagpipes
208
+ 206,/m/02bxd,Didgeridoo
209
+ 207,/m/0l14l2,Shofar
210
+ 208,/m/07kc_,Theremin
211
+ 209,/m/0l14t7,Singing bowl
212
+ 210,/m/01hgjl,Scratching (performance technique)
213
+ 211,/m/064t9,Pop music
214
+ 212,/m/0glt670,Hip hop music
215
+ 213,/m/02cz_7,Beatboxing
216
+ 214,/m/06by7,Rock music
217
+ 215,/m/03lty,Heavy metal
218
+ 216,/m/05r6t,Punk rock
219
+ 217,/m/0dls3,Grunge
220
+ 218,/m/0dl5d,Progressive rock
221
+ 219,/m/07sbbz2,Rock and roll
222
+ 220,/m/05w3f,Psychedelic rock
223
+ 221,/m/06j6l,Rhythm and blues
224
+ 222,/m/0gywn,Soul music
225
+ 223,/m/06cqb,Reggae
226
+ 224,/m/01lyv,Country
227
+ 225,/m/015y_n,Swing music
228
+ 226,/m/0gg8l,Bluegrass
229
+ 227,/m/02x8m,Funk
230
+ 228,/m/02w4v,Folk music
231
+ 229,/m/06j64v,Middle Eastern music
232
+ 230,/m/03_d0,Jazz
233
+ 231,/m/026z9,Disco
234
+ 232,/m/0ggq0m,Classical music
235
+ 233,/m/05lls,Opera
236
+ 234,/m/02lkt,Electronic music
237
+ 235,/m/03mb9,House music
238
+ 236,/m/07gxw,Techno
239
+ 237,/m/07s72n,Dubstep
240
+ 238,/m/0283d,Drum and bass
241
+ 239,/m/0m0jc,Electronica
242
+ 240,/m/08cyft,Electronic dance music
243
+ 241,/m/0fd3y,Ambient music
244
+ 242,/m/07lnk,Trance music
245
+ 243,/m/0g293,Music of Latin America
246
+ 244,/m/0ln16,Salsa music
247
+ 245,/m/0326g,Flamenco
248
+ 246,/m/0155w,Blues
249
+ 247,/m/05fw6t,Music for children
250
+ 248,/m/02v2lh,New-age music
251
+ 249,/m/0y4f8,Vocal music
252
+ 250,/m/0z9c,A capella
253
+ 251,/m/0164x2,Music of Africa
254
+ 252,/m/0145m,Afrobeat
255
+ 253,/m/02mscn,Christian music
256
+ 254,/m/016cjb,Gospel music
257
+ 255,/m/028sqc,Music of Asia
258
+ 256,/m/015vgc,Carnatic music
259
+ 257,/m/0dq0md,Music of Bollywood
260
+ 258,/m/06rqw,Ska
261
+ 259,/m/02p0sh1,Traditional music
262
+ 260,/m/05rwpb,Independent music
263
+ 261,/m/074ft,Song
264
+ 262,/m/025td0t,Background music
265
+ 263,/m/02cjck,Theme music
266
+ 264,/m/03r5q_,Jingle (music)
267
+ 265,/m/0l14gg,Soundtrack music
268
+ 266,/m/07pkxdp,Lullaby
269
+ 267,/m/01z7dr,Video game music
270
+ 268,/m/0140xf,Christmas music
271
+ 269,/m/0ggx5q,Dance music
272
+ 270,/m/04wptg,Wedding music
273
+ 271,/t/dd00031,Happy music
274
+ 272,/t/dd00033,Sad music
275
+ 273,/t/dd00034,Tender music
276
+ 274,/t/dd00035,Exciting music
277
+ 275,/t/dd00036,Angry music
278
+ 276,/t/dd00037,Scary music
279
+ 277,/m/03m9d0z,Wind
280
+ 278,/m/09t49,Rustling leaves
281
+ 279,/t/dd00092,Wind noise (microphone)
282
+ 280,/m/0jb2l,Thunderstorm
283
+ 281,/m/0ngt1,Thunder
284
+ 282,/m/0838f,Water
285
+ 283,/m/06mb1,Rain
286
+ 284,/m/07r10fb,Raindrop
287
+ 285,/t/dd00038,Rain on surface
288
+ 286,/m/0j6m2,Stream
289
+ 287,/m/0j2kx,Waterfall
290
+ 288,/m/05kq4,Ocean
291
+ 289,/m/034srq,"Waves, surf"
292
+ 290,/m/06wzb,Steam
293
+ 291,/m/07swgks,Gurgling
294
+ 292,/m/02_41,Fire
295
+ 293,/m/07pzfmf,Crackle
296
+ 294,/m/07yv9,Vehicle
297
+ 295,/m/019jd,"Boat, Water vehicle"
298
+ 296,/m/0hsrw,"Sailboat, sailing ship"
299
+ 297,/m/056ks2,"Rowboat, canoe, kayak"
300
+ 298,/m/02rlv9,"Motorboat, speedboat"
301
+ 299,/m/06q74,Ship
302
+ 300,/m/012f08,Motor vehicle (road)
303
+ 301,/m/0k4j,Car
304
+ 302,/m/0912c9,"Vehicle horn, car horn, honking"
305
+ 303,/m/07qv_d5,Toot
306
+ 304,/m/02mfyn,Car alarm
307
+ 305,/m/04gxbd,"Power windows, electric windows"
308
+ 306,/m/07rknqz,Skidding
309
+ 307,/m/0h9mv,Tire squeal
310
+ 308,/t/dd00134,Car passing by
311
+ 309,/m/0ltv,"Race car, auto racing"
312
+ 310,/m/07r04,Truck
313
+ 311,/m/0gvgw0,Air brake
314
+ 312,/m/05x_td,"Air horn, truck horn"
315
+ 313,/m/02rhddq,Reversing beeps
316
+ 314,/m/03cl9h,"Ice cream truck, ice cream van"
317
+ 315,/m/01bjv,Bus
318
+ 316,/m/03j1ly,Emergency vehicle
319
+ 317,/m/04qvtq,Police car (siren)
320
+ 318,/m/012n7d,Ambulance (siren)
321
+ 319,/m/012ndj,"Fire engine, fire truck (siren)"
322
+ 320,/m/04_sv,Motorcycle
323
+ 321,/m/0btp2,"Traffic noise, roadway noise"
324
+ 322,/m/06d_3,Rail transport
325
+ 323,/m/07jdr,Train
326
+ 324,/m/04zmvq,Train whistle
327
+ 325,/m/0284vy3,Train horn
328
+ 326,/m/01g50p,"Railroad car, train wagon"
329
+ 327,/t/dd00048,Train wheels squealing
330
+ 328,/m/0195fx,"Subway, metro, underground"
331
+ 329,/m/0k5j,Aircraft
332
+ 330,/m/014yck,Aircraft engine
333
+ 331,/m/04229,Jet engine
334
+ 332,/m/02l6bg,"Propeller, airscrew"
335
+ 333,/m/09ct_,Helicopter
336
+ 334,/m/0cmf2,"Fixed-wing aircraft, airplane"
337
+ 335,/m/0199g,Bicycle
338
+ 336,/m/06_fw,Skateboard
339
+ 337,/m/02mk9,Engine
340
+ 338,/t/dd00065,Light engine (high frequency)
341
+ 339,/m/08j51y,"Dental drill, dentist's drill"
342
+ 340,/m/01yg9g,Lawn mower
343
+ 341,/m/01j4z9,Chainsaw
344
+ 342,/t/dd00066,Medium engine (mid frequency)
345
+ 343,/t/dd00067,Heavy engine (low frequency)
346
+ 344,/m/01h82_,Engine knocking
347
+ 345,/t/dd00130,Engine starting
348
+ 346,/m/07pb8fc,Idling
349
+ 347,/m/07q2z82,"Accelerating, revving, vroom"
350
+ 348,/m/02dgv,Door
351
+ 349,/m/03wwcy,Doorbell
352
+ 350,/m/07r67yg,Ding-dong
353
+ 351,/m/02y_763,Sliding door
354
+ 352,/m/07rjzl8,Slam
355
+ 353,/m/07r4wb8,Knock
356
+ 354,/m/07qcpgn,Tap
357
+ 355,/m/07q6cd_,Squeak
358
+ 356,/m/0642b4,Cupboard open or close
359
+ 357,/m/0fqfqc,Drawer open or close
360
+ 358,/m/04brg2,"Dishes, pots, and pans"
361
+ 359,/m/023pjk,"Cutlery, silverware"
362
+ 360,/m/07pn_8q,Chopping (food)
363
+ 361,/m/0dxrf,Frying (food)
364
+ 362,/m/0fx9l,Microwave oven
365
+ 363,/m/02pjr4,Blender
366
+ 364,/m/02jz0l,"Water tap, faucet"
367
+ 365,/m/0130jx,Sink (filling or washing)
368
+ 366,/m/03dnzn,Bathtub (filling or washing)
369
+ 367,/m/03wvsk,Hair dryer
370
+ 368,/m/01jt3m,Toilet flush
371
+ 369,/m/012xff,Toothbrush
372
+ 370,/m/04fgwm,Electric toothbrush
373
+ 371,/m/0d31p,Vacuum cleaner
374
+ 372,/m/01s0vc,Zipper (clothing)
375
+ 373,/m/03v3yw,Keys jangling
376
+ 374,/m/0242l,Coin (dropping)
377
+ 375,/m/01lsmm,Scissors
378
+ 376,/m/02g901,"Electric shaver, electric razor"
379
+ 377,/m/05rj2,Shuffling cards
380
+ 378,/m/0316dw,Typing
381
+ 379,/m/0c2wf,Typewriter
382
+ 380,/m/01m2v,Computer keyboard
383
+ 381,/m/081rb,Writing
384
+ 382,/m/07pp_mv,Alarm
385
+ 383,/m/07cx4,Telephone
386
+ 384,/m/07pp8cl,Telephone bell ringing
387
+ 385,/m/01hnzm,Ringtone
388
+ 386,/m/02c8p,"Telephone dialing, DTMF"
389
+ 387,/m/015jpf,Dial tone
390
+ 388,/m/01z47d,Busy signal
391
+ 389,/m/046dlr,Alarm clock
392
+ 390,/m/03kmc9,Siren
393
+ 391,/m/0dgbq,Civil defense siren
394
+ 392,/m/030rvx,Buzzer
395
+ 393,/m/01y3hg,"Smoke detector, smoke alarm"
396
+ 394,/m/0c3f7m,Fire alarm
397
+ 395,/m/04fq5q,Foghorn
398
+ 396,/m/0l156k,Whistle
399
+ 397,/m/06hck5,Steam whistle
400
+ 398,/t/dd00077,Mechanisms
401
+ 399,/m/02bm9n,"Ratchet, pawl"
402
+ 400,/m/01x3z,Clock
403
+ 401,/m/07qjznt,Tick
404
+ 402,/m/07qjznl,Tick-tock
405
+ 403,/m/0l7xg,Gears
406
+ 404,/m/05zc1,Pulleys
407
+ 405,/m/0llzx,Sewing machine
408
+ 406,/m/02x984l,Mechanical fan
409
+ 407,/m/025wky1,Air conditioning
410
+ 408,/m/024dl,Cash register
411
+ 409,/m/01m4t,Printer
412
+ 410,/m/0dv5r,Camera
413
+ 411,/m/07bjf,Single-lens reflex camera
414
+ 412,/m/07k1x,Tools
415
+ 413,/m/03l9g,Hammer
416
+ 414,/m/03p19w,Jackhammer
417
+ 415,/m/01b82r,Sawing
418
+ 416,/m/02p01q,Filing (rasp)
419
+ 417,/m/023vsd,Sanding
420
+ 418,/m/0_ksk,Power tool
421
+ 419,/m/01d380,Drill
422
+ 420,/m/014zdl,Explosion
423
+ 421,/m/032s66,"Gunshot, gunfire"
424
+ 422,/m/04zjc,Machine gun
425
+ 423,/m/02z32qm,Fusillade
426
+ 424,/m/0_1c,Artillery fire
427
+ 425,/m/073cg4,Cap gun
428
+ 426,/m/0g6b5,Fireworks
429
+ 427,/g/122z_qxw,Firecracker
430
+ 428,/m/07qsvvw,"Burst, pop"
431
+ 429,/m/07pxg6y,Eruption
432
+ 430,/m/07qqyl4,Boom
433
+ 431,/m/083vt,Wood
434
+ 432,/m/07pczhz,Chop
435
+ 433,/m/07pl1bw,Splinter
436
+ 434,/m/07qs1cx,Crack
437
+ 435,/m/039jq,Glass
438
+ 436,/m/07q7njn,"Chink, clink"
439
+ 437,/m/07rn7sz,Shatter
440
+ 438,/m/04k94,Liquid
441
+ 439,/m/07rrlb6,"Splash, splatter"
442
+ 440,/m/07p6mqd,Slosh
443
+ 441,/m/07qlwh6,Squish
444
+ 442,/m/07r5v4s,Drip
445
+ 443,/m/07prgkl,Pour
446
+ 444,/m/07pqc89,"Trickle, dribble"
447
+ 445,/t/dd00088,Gush
448
+ 446,/m/07p7b8y,Fill (with liquid)
449
+ 447,/m/07qlf79,Spray
450
+ 448,/m/07ptzwd,Pump (liquid)
451
+ 449,/m/07ptfmf,Stir
452
+ 450,/m/0dv3j,Boiling
453
+ 451,/m/0790c,Sonar
454
+ 452,/m/0dl83,Arrow
455
+ 453,/m/07rqsjt,"Whoosh, swoosh, swish"
456
+ 454,/m/07qnq_y,"Thump, thud"
457
+ 455,/m/07rrh0c,Thunk
458
+ 456,/m/0b_fwt,Electronic tuner
459
+ 457,/m/02rr_,Effects unit
460
+ 458,/m/07m2kt,Chorus effect
461
+ 459,/m/018w8,Basketball bounce
462
+ 460,/m/07pws3f,Bang
463
+ 461,/m/07ryjzk,"Slap, smack"
464
+ 462,/m/07rdhzs,"Whack, thwack"
465
+ 463,/m/07pjjrj,"Smash, crash"
466
+ 464,/m/07pc8lb,Breaking
467
+ 465,/m/07pqn27,Bouncing
468
+ 466,/m/07rbp7_,Whip
469
+ 467,/m/07pyf11,Flap
470
+ 468,/m/07qb_dv,Scratch
471
+ 469,/m/07qv4k0,Scrape
472
+ 470,/m/07pdjhy,Rub
473
+ 471,/m/07s8j8t,Roll
474
+ 472,/m/07plct2,Crushing
475
+ 473,/t/dd00112,"Crumpling, crinkling"
476
+ 474,/m/07qcx4z,Tearing
477
+ 475,/m/02fs_r,"Beep, bleep"
478
+ 476,/m/07qwdck,Ping
479
+ 477,/m/07phxs1,Ding
480
+ 478,/m/07rv4dm,Clang
481
+ 479,/m/07s02z0,Squeal
482
+ 480,/m/07qh7jl,Creak
483
+ 481,/m/07qwyj0,Rustle
484
+ 482,/m/07s34ls,Whir
485
+ 483,/m/07qmpdm,Clatter
486
+ 484,/m/07p9k1k,Sizzle
487
+ 485,/m/07qc9xj,Clicking
488
+ 486,/m/07rwm0c,Clickety-clack
489
+ 487,/m/07phhsh,Rumble
490
+ 488,/m/07qyrcz,Plop
491
+ 489,/m/07qfgpx,"Jingle, tinkle"
492
+ 490,/m/07rcgpl,Hum
493
+ 491,/m/07p78v5,Zing
494
+ 492,/t/dd00121,Boing
495
+ 493,/m/07s12q4,Crunch
496
+ 494,/m/028v0c,Silence
497
+ 495,/m/01v_m0,Sine wave
498
+ 496,/m/0b9m1,Harmonic
499
+ 497,/m/0hdsk,Chirp tone
500
+ 498,/m/0c1dj,Sound effect
501
+ 499,/m/07pt_g0,Pulse
502
+ 500,/t/dd00125,"Inside, small room"
503
+ 501,/t/dd00126,"Inside, large room or hall"
504
+ 502,/t/dd00127,"Inside, public space"
505
+ 503,/t/dd00128,"Outside, urban or manmade"
506
+ 504,/t/dd00129,"Outside, rural or natural"
507
+ 505,/m/01b9nn,Reverberation
508
+ 506,/m/01jnbd,Echo
509
+ 507,/m/096m7z,Noise
510
+ 508,/m/06_y0by,Environmental noise
511
+ 509,/m/07rgkc5,Static
512
+ 510,/m/06xkwv,Mains hum
513
+ 511,/m/0g12c5,Distortion
514
+ 512,/m/08p9q4,Sidetone
515
+ 513,/m/07szfh9,Cacophony
516
+ 514,/m/0chx_,White noise
517
+ 515,/m/0cj0r,Pink noise
518
+ 516,/m/07p_0gm,Throbbing
519
+ 517,/m/01jwx6,Vibration
520
+ 518,/m/07c52,Television
521
+ 519,/m/06bz3,Radio
522
+ 520,/m/07hvw1,Field recording
yamnet_saved_model/fingerprint.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd5fc9281fd065d54cf089e0fcdee0b36172677c5261d02d51ac57bb16ddb08e
3
+ size 57
yamnet_saved_model/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14827dd28b4400f559dac66563edbb8c648b7aaad9e5ad2214ffd7759b832d2d
3
+ size 2947713
yamnet_saved_model/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac2e57a8feb68ba9fbc93248ed2d7f2ec8940a95de07a5131d1bc39c6ffbe31
3
+ size 15140606
yamnet_saved_model/variables/variables.index ADDED
Binary file (7.4 kB). View file