Spaces:
Sleeping
Sleeping
Image-Processsing
commited on
Commit
•
85b6267
1
Parent(s):
7799bd8
Upload 18 files
Browse files- .gitattributes +1 -0
- Dockerfile +11 -0
- app.py +346 -0
- requirements.txt +134 -0
- utils/ImageAndTextEmbedding/index.py +40 -0
- utils/audioEmbedding/index.py +28 -0
- utils/imageEmbedding/index.py +17 -0
- utils/imageToText/index.py +24 -0
- utils/objectDetection/index.py +12 -0
- utils/sample.py +77 -0
- utils/sentanceEmbedding/index.py +32 -0
- utils/similarityScore.py +41 -0
- utils/videoEmbedding/index.py +53 -0
- word2vec_model.pkl +3 -0
- yamnet_saved_model/assets/yamnet_class_map.csv +522 -0
- yamnet_saved_model/fingerprint.pb +3 -0
- yamnet_saved_model/saved_model.pb +3 -0
- yamnet_saved_model/variables/variables.data-00000-of-00001 +3 -0
- yamnet_saved_model/variables/variables.index +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
yamnet_saved_model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY ./requirements.txt /code/requirements.txt
|
6 |
+
|
7 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
+
|
9 |
+
COPY . .
|
10 |
+
|
11 |
+
CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
|
app.py
ADDED
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import moviepy.editor as mp
|
2 |
+
from flask import Flask, request, jsonify
|
3 |
+
from flask_cors import CORS
|
4 |
+
import requests
|
5 |
+
from io import BytesIO
|
6 |
+
import speech_recognition as sr
|
7 |
+
import io
|
8 |
+
import fitz
|
9 |
+
import numpy as np
|
10 |
+
import cv2
|
11 |
+
from flask_caching import Cache
|
12 |
+
|
13 |
+
from utils.audioEmbedding.index import extract_audio_embeddings
|
14 |
+
from utils.videoEmbedding.index import get_video_embedding
|
15 |
+
from utils.imageToText.index import extract_text
|
16 |
+
from utils.sentanceEmbedding.index import get_text_vector , get_text_discription_vector
|
17 |
+
from utils.imageEmbedding.index import get_image_embedding
|
18 |
+
from utils.similarityScore import get_all_similarities
|
19 |
+
from utils.objectDetection.index import detect_objects
|
20 |
+
|
21 |
+
app = Flask(__name__)
|
22 |
+
cache = Cache(app, config={'CACHE_TYPE': 'simple'}) # You can choose a caching type based on your requirements
|
23 |
+
CORS(app)
|
24 |
+
import moviepy.editor as mp
|
25 |
+
import tempfile
|
26 |
+
|
27 |
+
def get_face_locations(binary_data):
|
28 |
+
# Convert binary image data to numpy array
|
29 |
+
print(1)
|
30 |
+
nparr = np.frombuffer(binary_data, np.uint8)
|
31 |
+
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
32 |
+
|
33 |
+
# Load the pre-trained face detection model
|
34 |
+
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
|
35 |
+
|
36 |
+
# Convert the image to grayscale
|
37 |
+
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
38 |
+
|
39 |
+
# Detect faces in the image
|
40 |
+
faces = face_cascade.detectMultiScale(gray_image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
|
41 |
+
|
42 |
+
# Extract face locations
|
43 |
+
print(2)
|
44 |
+
face_locations = []
|
45 |
+
for (x, y, w, h) in faces:
|
46 |
+
face_locations.append({"top": y, "right": x + w, "bottom": y + h, "left": x})
|
47 |
+
print(3)
|
48 |
+
return face_locations
|
49 |
+
|
50 |
+
def seperate_image_text_from_pdf(pdf_url):
|
51 |
+
# List to store page information
|
52 |
+
pages_info = []
|
53 |
+
|
54 |
+
# Fetch the PDF from the URL
|
55 |
+
response = requests.get(pdf_url)
|
56 |
+
|
57 |
+
if response.status_code == 200:
|
58 |
+
# Create a temporary file to save the PDF data
|
59 |
+
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
60 |
+
tmp_file.write(response.content)
|
61 |
+
tmp_file_path = tmp_file.name
|
62 |
+
|
63 |
+
# Open the PDF
|
64 |
+
pdf = fitz.open(tmp_file_path)
|
65 |
+
|
66 |
+
# Iterate through each page
|
67 |
+
for page_num in range(len(pdf)):
|
68 |
+
page = pdf.load_page(page_num)
|
69 |
+
|
70 |
+
# Extract text
|
71 |
+
text = page.get_text()
|
72 |
+
|
73 |
+
# Count images
|
74 |
+
image_list = page.get_images(full=True)
|
75 |
+
|
76 |
+
# Convert images to BytesIO and store in a list
|
77 |
+
images_bytes = []
|
78 |
+
for img_index, img_info in enumerate(image_list):
|
79 |
+
xref = img_info[0]
|
80 |
+
base_image = pdf.extract_image(xref)
|
81 |
+
image_bytes = base_image["image"]
|
82 |
+
images_bytes.append(image_bytes)
|
83 |
+
|
84 |
+
# Store page information in a dictionary
|
85 |
+
page_info = {
|
86 |
+
"pgno": page_num + 1,
|
87 |
+
"images": images_bytes,
|
88 |
+
"text": text
|
89 |
+
}
|
90 |
+
|
91 |
+
# Append page information to the list
|
92 |
+
pages_info.append(page_info)
|
93 |
+
|
94 |
+
# Close the PDF
|
95 |
+
pdf.close()
|
96 |
+
|
97 |
+
# Clean up the temporary file
|
98 |
+
import os
|
99 |
+
os.unlink(tmp_file_path)
|
100 |
+
else:
|
101 |
+
print("Failed to fetch the PDF from the URL.")
|
102 |
+
|
103 |
+
return pages_info
|
104 |
+
|
105 |
+
def pdf_image_text_embedding_and_text_embedding(pages_info):
|
106 |
+
try:
|
107 |
+
# List to store page embeddings
|
108 |
+
page_embeddings = []
|
109 |
+
|
110 |
+
# Iterate through each page
|
111 |
+
for page in pages_info:
|
112 |
+
# Extract text from the page
|
113 |
+
text = page["text"]
|
114 |
+
|
115 |
+
# Extract images from the page
|
116 |
+
images = page["images"]
|
117 |
+
|
118 |
+
# List to store image embeddings
|
119 |
+
image_embeddings = []
|
120 |
+
|
121 |
+
# Iterate through each image
|
122 |
+
for image in images:
|
123 |
+
try:
|
124 |
+
# Assuming image is a binary data (e.g., bytes)
|
125 |
+
response = requests.post('https://imageprocessing-backend.hf.space/extract_image_text_and_embedding_binary_data', data=image)
|
126 |
+
if response.status_code != 200:
|
127 |
+
print(f"Failed to process image: {image}")
|
128 |
+
continue
|
129 |
+
|
130 |
+
result = response.json()
|
131 |
+
image_embedding = result.get("image_embedding")
|
132 |
+
extracted_text = result.get("extracted_text")
|
133 |
+
|
134 |
+
# Append the image embedding to the list
|
135 |
+
image_embeddings.append({"image_embedding": image_embedding, "extracted_text": extracted_text})
|
136 |
+
|
137 |
+
print(len(image_embeddings))
|
138 |
+
except Exception as e:
|
139 |
+
print(f"error")
|
140 |
+
|
141 |
+
# Get the text embedding
|
142 |
+
# Store the page embeddings in a dictionary
|
143 |
+
page_embedding = {
|
144 |
+
"images": image_embeddings,
|
145 |
+
"text": text,
|
146 |
+
}
|
147 |
+
|
148 |
+
# Append the page embedding to the list
|
149 |
+
page_embeddings.append(page_embedding)
|
150 |
+
|
151 |
+
return page_embeddings
|
152 |
+
except Exception as e:
|
153 |
+
print("An error occurred:", e)
|
154 |
+
return "Error"
|
155 |
+
|
156 |
+
|
157 |
+
def separate_audio_from_video(video_url):
|
158 |
+
try:
|
159 |
+
# Load the video file
|
160 |
+
video = mp.VideoFileClip(video_url)
|
161 |
+
|
162 |
+
# Extract audio
|
163 |
+
audio = video.audio
|
164 |
+
|
165 |
+
# Create a temporary file to write the audio data
|
166 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
|
167 |
+
temp_audio_filename = temp_audio_file.name
|
168 |
+
|
169 |
+
# Write the audio data to the temporary file
|
170 |
+
audio.write_audiofile(temp_audio_filename)
|
171 |
+
|
172 |
+
# Read the audio data from the temporary file as bytes
|
173 |
+
with open(temp_audio_filename, "rb") as f:
|
174 |
+
audio_bytes = f.read()
|
175 |
+
|
176 |
+
return audio_bytes
|
177 |
+
|
178 |
+
except Exception as e:
|
179 |
+
print("An error occurred:", e)
|
180 |
+
|
181 |
+
|
182 |
+
|
183 |
+
|
184 |
+
@cache.cached(timeout=300)
|
185 |
+
@app.route('/get_text_embedding', methods=['POST'])
|
186 |
+
def get_text_embedding_route():
|
187 |
+
try:
|
188 |
+
text = request.json.get("text")
|
189 |
+
text_embedding = get_text_vector(text)
|
190 |
+
return jsonify({"text_embedding": text_embedding}), 200
|
191 |
+
|
192 |
+
except Exception as e:
|
193 |
+
return jsonify({"error": str(e)}), 500
|
194 |
+
|
195 |
+
|
196 |
+
@cache.cached(timeout=300)
|
197 |
+
@app.route('/extract_audio_text_and_embedding', methods=['POST'])
|
198 |
+
def get_audio_embedding_route():
|
199 |
+
audio_url = request.json.get('audio_url')
|
200 |
+
print(audio_url)
|
201 |
+
response = requests.get(audio_url)
|
202 |
+
audio_data = response.content
|
203 |
+
audio_embedding = extract_audio_embeddings(audio_data)
|
204 |
+
audio_embedding_list = audio_embedding
|
205 |
+
audio_file = BytesIO(audio_data)
|
206 |
+
r = sr.Recognizer()
|
207 |
+
with sr.AudioFile(audio_file) as source:
|
208 |
+
audio_data = r.record(source)
|
209 |
+
extracted_text = ""
|
210 |
+
try:
|
211 |
+
text = r.recognize_google(audio_data)
|
212 |
+
extracted_text = text
|
213 |
+
except Exception as e:
|
214 |
+
print(e)
|
215 |
+
return jsonify({"extracted_text": extracted_text, "audio_embedding": audio_embedding_list}), 200
|
216 |
+
|
217 |
+
# Route to get image embeddings
|
218 |
+
@cache.cached(timeout=300)
|
219 |
+
@app.route('/extract_image_text_and_embedding', methods=['POST'])
|
220 |
+
def get_image_embedding_route():
|
221 |
+
try:
|
222 |
+
image_url = request.json.get("imageUrl")
|
223 |
+
print(image_url)
|
224 |
+
response = requests.get(image_url)
|
225 |
+
if response.status_code != 200:
|
226 |
+
return jsonify({"error": "Failed to download image"}), 500
|
227 |
+
binary_data = response.content
|
228 |
+
extracted_text = extract_text(binary_data)
|
229 |
+
image_embedding = get_image_embedding(binary_data)
|
230 |
+
image_embedding_list = image_embedding.tolist()
|
231 |
+
return jsonify({"image_embedding": image_embedding_list,"extracted_text":extracted_text}), 200
|
232 |
+
|
233 |
+
except Exception as e:
|
234 |
+
return jsonify({"error": str(e)}), 500
|
235 |
+
|
236 |
+
# Route to get video embeddings
|
237 |
+
@cache.cached(timeout=300)
|
238 |
+
@app.route('/extract_video_text_and_embedding', methods=['POST'])
|
239 |
+
def get_video_embedding_route():
|
240 |
+
try:
|
241 |
+
video_url = request.json.get("videoUrl")
|
242 |
+
audio_data = separate_audio_from_video(video_url)
|
243 |
+
audio_embedding = extract_audio_embeddings(audio_data)
|
244 |
+
audio_embedding_list = audio_embedding
|
245 |
+
audio_file = io.BytesIO(audio_data)
|
246 |
+
r = sr.Recognizer()
|
247 |
+
with sr.AudioFile(audio_file) as source:
|
248 |
+
audio_data = r.record(source)
|
249 |
+
extracted_text = ""
|
250 |
+
try:
|
251 |
+
text = r.recognize_google(audio_data)
|
252 |
+
extracted_text = text
|
253 |
+
except Exception as e:
|
254 |
+
print(e)
|
255 |
+
video_embedding = get_video_embedding(video_url)
|
256 |
+
return jsonify({"video_embedding": video_embedding,"extracted_audio_text": extracted_text, "audio_embedding": audio_embedding_list}), 200
|
257 |
+
|
258 |
+
except Exception as e:
|
259 |
+
print(e)
|
260 |
+
return jsonify({"error": str(e)}), 500
|
261 |
+
|
262 |
+
@cache.cached(timeout=300)
|
263 |
+
@app.route('/extract_pdf_text_and_embedding', methods=['POST'])
|
264 |
+
def extract_pdf_text_and_embedding():
|
265 |
+
try:
|
266 |
+
pdf_url = request.json.get("pdfUrl")
|
267 |
+
print(1)
|
268 |
+
pages_info = seperate_image_text_from_pdf(pdf_url)
|
269 |
+
# print(pages_info)
|
270 |
+
content = pdf_image_text_embedding_and_text_embedding(pages_info)
|
271 |
+
# print(content)
|
272 |
+
return jsonify({"content": content}), 200
|
273 |
+
|
274 |
+
except Exception as e:
|
275 |
+
return jsonify({"error": str(e)}), 500
|
276 |
+
|
277 |
+
# Route to get text description embeddings
|
278 |
+
@cache.cached(timeout=300)
|
279 |
+
@app.route('/getTextDescriptionEmbedding', methods=['POST'])
|
280 |
+
def get_text_description_embedding_route():
|
281 |
+
try:
|
282 |
+
text = request.json.get("text")
|
283 |
+
text_description_embedding = get_text_discription_vector(text)
|
284 |
+
return jsonify({"text_description_embedding": text_description_embedding.tolist()}), 200
|
285 |
+
|
286 |
+
except Exception as e:
|
287 |
+
return jsonify({"error": str(e)}), 500
|
288 |
+
|
289 |
+
|
290 |
+
|
291 |
+
# Route to get object detection results
|
292 |
+
@cache.cached(timeout=300)
|
293 |
+
@app.route('/detectObjects', methods=['POST'])
|
294 |
+
def detect_objects_route():
|
295 |
+
try:
|
296 |
+
image_url = request.json.get("imageUrl")
|
297 |
+
response = requests.get(image_url)
|
298 |
+
if response.status_code != 200:
|
299 |
+
return jsonify({"error": "Failed to download image"}), 500
|
300 |
+
binary_data = response.content
|
301 |
+
object_detection_results = detect_objects(binary_data)
|
302 |
+
return jsonify({"object_detection_results": object_detection_results}), 200
|
303 |
+
|
304 |
+
except Exception as e:
|
305 |
+
return jsonify({"error": str(e)}), 500
|
306 |
+
|
307 |
+
# Route to get face locations
|
308 |
+
@cache.cached(timeout=300)
|
309 |
+
@app.route('/getFaceLocations', methods=['POST'])
|
310 |
+
def get_face_locations_route():
|
311 |
+
try:
|
312 |
+
image_url = request.json.get("imageUrl")
|
313 |
+
response = requests.get(image_url)
|
314 |
+
print(11)
|
315 |
+
if response.status_code != 200:
|
316 |
+
return jsonify({"error": "Failed to download image"}), 500
|
317 |
+
print(22)
|
318 |
+
binary_data = response.content
|
319 |
+
face_locations = get_face_locations(binary_data)
|
320 |
+
print(33)
|
321 |
+
print("ok",face_locations)
|
322 |
+
return jsonify({"face_locations": str(face_locations)}), 200
|
323 |
+
|
324 |
+
except Exception as e:
|
325 |
+
print(e)
|
326 |
+
return jsonify({"error": str(e)}), 500
|
327 |
+
|
328 |
+
# Route to get similarity score
|
329 |
+
@cache.cached(timeout=300)
|
330 |
+
@app.route('/getSimilarityScore', methods=['POST'])
|
331 |
+
def get_similarity_score_route():
|
332 |
+
try:
|
333 |
+
embedding1 = request.json.get("embedding1")
|
334 |
+
embedding2 = request.json.get("embedding2")
|
335 |
+
# Assuming embeddings are provided as lists
|
336 |
+
similarity_score = get_all_similarities(embedding1, embedding2)
|
337 |
+
return jsonify({"similarity_score": similarity_score}), 200
|
338 |
+
|
339 |
+
except Exception as e:
|
340 |
+
return jsonify({"error": str(e)}), 500
|
341 |
+
|
342 |
+
@app.route('/')
|
343 |
+
def hello():
|
344 |
+
return 'Hello, World!'
|
345 |
+
|
346 |
+
app.run()
|
requirements.txt
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==2.1.0
|
2 |
+
aiohttp==3.9.3
|
3 |
+
aiosignal==1.3.1
|
4 |
+
annotated-types==0.6.0
|
5 |
+
anyio==4.3.0
|
6 |
+
asgiref==3.7.2
|
7 |
+
astunparse==1.6.3
|
8 |
+
attrs==23.2.0
|
9 |
+
audioread==3.0.1
|
10 |
+
beautifulsoup4==4.12.3
|
11 |
+
blinker==1.7.0
|
12 |
+
cachelib==0.9.0
|
13 |
+
certifi==2024.2.2
|
14 |
+
cffi==1.16.0
|
15 |
+
charset-normalizer==3.3.2
|
16 |
+
click==8.1.7
|
17 |
+
colorama==0.4.6
|
18 |
+
decorator==4.4.2
|
19 |
+
distro==1.9.0
|
20 |
+
Django==5.0.1
|
21 |
+
django-cors-headers==4.3.1
|
22 |
+
django-restframework==0.0.1
|
23 |
+
djangorestframework==3.14.0
|
24 |
+
dlib==19.24.2
|
25 |
+
et-xmlfile==1.1.0
|
26 |
+
face-recognition==1.3.0
|
27 |
+
face_recognition_models==0.3.0
|
28 |
+
filelock==3.13.3
|
29 |
+
Flask==3.0.2
|
30 |
+
Flask-Caching==2.1.0
|
31 |
+
Flask-Cors==4.0.0
|
32 |
+
flatbuffers==24.3.25
|
33 |
+
frozenlist==1.4.1
|
34 |
+
fsspec==2024.3.1
|
35 |
+
gast==0.5.4
|
36 |
+
gensim==4.3.2
|
37 |
+
google-pasta==0.2.0
|
38 |
+
grpcio==1.62.1
|
39 |
+
h11==0.14.0
|
40 |
+
h5py==3.10.0
|
41 |
+
httpcore==1.0.4
|
42 |
+
httpx==0.27.0
|
43 |
+
huggingface-hub==0.22.2
|
44 |
+
idna==3.6
|
45 |
+
imageio==2.34.0
|
46 |
+
imageio-ffmpeg==0.4.9
|
47 |
+
itsdangerous==2.1.2
|
48 |
+
Jinja2==3.1.3
|
49 |
+
joblib==1.3.2
|
50 |
+
keras==3.1.1
|
51 |
+
lazy_loader==0.3
|
52 |
+
libclang==18.1.1
|
53 |
+
librosa==0.10.1
|
54 |
+
llvmlite==0.42.0
|
55 |
+
Markdown==3.6
|
56 |
+
markdown-it-py==3.0.0
|
57 |
+
MarkupSafe==2.1.5
|
58 |
+
mdurl==0.1.2
|
59 |
+
ml-dtypes==0.3.2
|
60 |
+
moviepy==1.0.3
|
61 |
+
mpmath==1.3.0
|
62 |
+
msgpack==1.0.8
|
63 |
+
multidict==6.0.5
|
64 |
+
namex==0.0.7
|
65 |
+
networkx==3.2.1
|
66 |
+
numba==0.59.1
|
67 |
+
numpy==1.26.3
|
68 |
+
openai==0.28.0
|
69 |
+
opencv-python==4.9.0.80
|
70 |
+
openpyxl==3.1.2
|
71 |
+
opt-einsum==3.3.0
|
72 |
+
optree==0.11.0
|
73 |
+
outcome==1.3.0.post0
|
74 |
+
packaging==24.0
|
75 |
+
pandas==2.2.0
|
76 |
+
pillow==10.3.0
|
77 |
+
platformdirs==4.2.0
|
78 |
+
pooch==1.8.1
|
79 |
+
proglog==0.1.10
|
80 |
+
protobuf==4.25.3
|
81 |
+
pycparser==2.22
|
82 |
+
pydantic==2.6.3
|
83 |
+
pydantic_core==2.16.3
|
84 |
+
pydub==0.25.1
|
85 |
+
Pygments==2.17.2
|
86 |
+
PyMuPDF==1.24.0
|
87 |
+
PyMuPDFb==1.24.0
|
88 |
+
PySocks==1.7.1
|
89 |
+
python-dateutil==2.8.2
|
90 |
+
python-dotenv==1.0.1
|
91 |
+
pytz==2023.4
|
92 |
+
PyYAML==6.0.1
|
93 |
+
regex==2023.12.25
|
94 |
+
requests==2.31.0
|
95 |
+
rich==13.7.1
|
96 |
+
safetensors==0.4.2
|
97 |
+
scikit-learn==1.4.1.post1
|
98 |
+
scipy==1.12.0
|
99 |
+
selenium==4.19.0
|
100 |
+
setuptools==69.2.0
|
101 |
+
six==1.16.0
|
102 |
+
smart-open==7.0.4
|
103 |
+
sniffio==1.3.1
|
104 |
+
sortedcontainers==2.4.0
|
105 |
+
soundfile==0.12.1
|
106 |
+
soupsieve==2.5
|
107 |
+
soxr==0.3.7
|
108 |
+
SpeechRecognition==3.10.1
|
109 |
+
sqlparse==0.4.4
|
110 |
+
sympy==1.12
|
111 |
+
tensorboard==2.16.2
|
112 |
+
tensorboard-data-server==0.7.2
|
113 |
+
tensorflow==2.16.1
|
114 |
+
tensorflow-intel==2.16.1
|
115 |
+
termcolor==2.4.0
|
116 |
+
tf_keras==2.16.0
|
117 |
+
threadpoolctl==3.4.0
|
118 |
+
timm==0.9.16
|
119 |
+
tokenizers==0.15.2
|
120 |
+
torch==2.2.2
|
121 |
+
torchvision==0.17.2
|
122 |
+
tqdm==4.66.2
|
123 |
+
transformers==4.39.2
|
124 |
+
trio==0.25.0
|
125 |
+
trio-websocket==0.11.1
|
126 |
+
typing_extensions==4.10.0
|
127 |
+
tzdata==2023.4
|
128 |
+
urllib3==2.2.1
|
129 |
+
webdriver-manager==4.0.1
|
130 |
+
Werkzeug==3.0.1
|
131 |
+
wheel==0.43.0
|
132 |
+
wrapt==1.16.0
|
133 |
+
wsproto==1.2.0
|
134 |
+
yarl==1.9.4
|
utils/ImageAndTextEmbedding/index.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PIL import Image
|
2 |
+
import io
|
3 |
+
from transformers import AutoTokenizer, CLIPProcessor, CLIPModel
|
4 |
+
import torch
|
5 |
+
|
6 |
+
# Load CLIP model and processor
|
7 |
+
model_name = "openai/clip-vit-base-patch32"
|
8 |
+
loaded_model = CLIPModel.from_pretrained(model_name)
|
9 |
+
loaded_processor = CLIPProcessor.from_pretrained(model_name)
|
10 |
+
|
11 |
+
def getTextEmbedding(text):
|
12 |
+
# Preprocess the text
|
13 |
+
print("tear")
|
14 |
+
inputs_text = loaded_processor(text=[text], return_tensors="pt", padding=True)
|
15 |
+
print("here")
|
16 |
+
# Forward pass through the model
|
17 |
+
with torch.no_grad():
|
18 |
+
# Get the text features
|
19 |
+
text_features = loaded_model.get_text_features(input_ids=inputs_text.input_ids, attention_mask=inputs_text.attention_mask)
|
20 |
+
print("bear")
|
21 |
+
# Convert tensor to numpy array for better readability
|
22 |
+
text_embedding = text_features.squeeze().numpy()
|
23 |
+
print("done")
|
24 |
+
return text_embedding
|
25 |
+
|
26 |
+
def getImageEmbedding(binary_image_data):
|
27 |
+
# Load and preprocess the image
|
28 |
+
image = Image.open(io.BytesIO(binary_image_data))
|
29 |
+
inputs = loaded_processor(images=image, return_tensors="pt", padding=True)
|
30 |
+
|
31 |
+
# Forward pass through the model
|
32 |
+
with torch.no_grad():
|
33 |
+
# Get the image features
|
34 |
+
image_features = loaded_model.get_image_features(pixel_values=inputs.pixel_values)
|
35 |
+
|
36 |
+
# Convert tensor to numpy array for better readability
|
37 |
+
image_embedding = image_features.squeeze().numpy()
|
38 |
+
|
39 |
+
return image_embedding
|
40 |
+
|
utils/audioEmbedding/index.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
import numpy as np
|
3 |
+
import librosa
|
4 |
+
import pickle
|
5 |
+
import io
|
6 |
+
|
7 |
+
# Load the YAMNet model from the SavedModel format
|
8 |
+
yamnet_model = tf.saved_model.load('yamnet_saved_model')
|
9 |
+
|
10 |
+
# Function to extract embeddings from audio file using YAMNet
|
11 |
+
def extract_audio_embeddings(audio_binary):
|
12 |
+
# Load audio from binary data using librosa
|
13 |
+
audio, sample_rate = librosa.load(io.BytesIO(audio_binary), sr=16000) # YAMNet requires a sample rate of 16kHz
|
14 |
+
# Convert audio to float32 tensor
|
15 |
+
audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32)
|
16 |
+
# Extract embeddings using YAMNet model
|
17 |
+
scores, embeddings, spectrogram = yamnet_model(audio_tensor)
|
18 |
+
embeddings_list = embeddings.numpy().tolist() # Convert embeddings to a list of lists
|
19 |
+
return embeddings_list
|
20 |
+
|
21 |
+
# Example usage
|
22 |
+
if __name__ == "__main__":
|
23 |
+
image_audio_path = "pictures/users/1a.mp3"
|
24 |
+
# Extract embeddings from image audio file
|
25 |
+
image_audio_embeddings = extract_audio_embeddings(image_audio_path)
|
26 |
+
print("Embeddings for", image_audio_path)
|
27 |
+
print(image_audio_embeddings)
|
28 |
+
print("audio embedding model loaded succesfully")
|
utils/imageEmbedding/index.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
from torchvision import transforms
|
3 |
+
from PIL import Image
|
4 |
+
import torch
|
5 |
+
import io
|
6 |
+
from utils.ImageAndTextEmbedding.index import getImageEmbedding
|
7 |
+
|
8 |
+
def get_image_embedding(image_bytes):
|
9 |
+
print("comming 1")
|
10 |
+
return getImageEmbedding(image_bytes)
|
11 |
+
|
12 |
+
# Example: Load image data from file and get its embedding
|
13 |
+
# image_data = open("pictures/users/2.jpg", "rb").read()
|
14 |
+
# embedding = get_image_embedding(image_data)
|
15 |
+
# print(embedding)
|
16 |
+
|
17 |
+
print("Image embedding model loaded successfully!")
|
utils/imageToText/index.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import re
|
3 |
+
from PIL import Image
|
4 |
+
from transformers import pipeline
|
5 |
+
import io
|
6 |
+
|
7 |
+
def clean_text(text):
|
8 |
+
clean_text = re.sub(r'<[^>]+>', '', text)
|
9 |
+
clean_text = clean_text.strip()
|
10 |
+
clean_text = re.sub(r'\s+', ' ', clean_text)
|
11 |
+
return clean_text
|
12 |
+
|
13 |
+
pipe = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD")
|
14 |
+
|
15 |
+
def extract_text(binary_image):
|
16 |
+
image = Image.open(io.BytesIO(binary_image))
|
17 |
+
result = pipe(image)
|
18 |
+
text = result[0]['generated_text']
|
19 |
+
cleaned_text = clean_text(text)
|
20 |
+
return cleaned_text
|
21 |
+
|
22 |
+
# print(extract_text(open("pictures/users/2.jpg", "rb").read()))
|
23 |
+
|
24 |
+
print("OCR pipeline loaded successfully!")
|
utils/objectDetection/index.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
from PIL import Image
|
3 |
+
from io import BytesIO
|
4 |
+
|
5 |
+
# Load the object detection pipeline
|
6 |
+
object_detection_pipeline = pipeline("object-detection", model="ciasimbaya/ObjectDetection")
|
7 |
+
def detect_objects(image_bytes):
|
8 |
+
image = Image.open(BytesIO(image_bytes))
|
9 |
+
result = object_detection_pipeline(image)
|
10 |
+
return result
|
11 |
+
|
12 |
+
print("object detection model loaded succesfully")
|
utils/sample.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
|
3 |
+
# Define the image URL
|
4 |
+
image_url = "https://utfs.io/f/47589c6c-6ce0-4baf-b75d-b1ec5d4d9dda-213j1w.jpg"
|
5 |
+
audio_url = "https://utfs.io/f/b84a84a2-b68f-49c5-8b7c-d76d894f6d3a-c5qjj4.wav"
|
6 |
+
video_url = "https://utfs.io/f/ef6c037f-fa61-471a-8956-562bc2d62531-fzxs1i.mp4"
|
7 |
+
family_url = "https://i.pinimg.com/originals/b2/20/14/b22014ca275e94097386aab222469caf.jpg"
|
8 |
+
|
9 |
+
|
10 |
+
# Define the URLs of the three nodes
|
11 |
+
extract_text_url = "http://127.0.0.1:5000/extractText"
|
12 |
+
extract_audio_text_url = "http://127.0.0.1:5000/extractAudioText"
|
13 |
+
get_image_embedding_url = "http://127.0.0.1:5000/getImageEmbedding"
|
14 |
+
get_text_embedding_url = "http://127.0.0.1:5000/getTextEmbedding"
|
15 |
+
get_text_description_embedding_url = "http://127.0.0.1:5000/getTextDescriptionEmbedding"
|
16 |
+
get_audio_embedding_url = "http://127.0.0.1:5000/getAudioEmbedding"
|
17 |
+
get_audio_extracted_text_url = "http://127.0.0.1:5000/getAudioExtractedText"
|
18 |
+
get_video_embedding_url = "http://127.0.0.1:5000/getVideoEmbedding"
|
19 |
+
get_object_detection_url = "http://127.0.0.1:5000/detectObjects"
|
20 |
+
get_similarity_score_url = "http://127.0.0.1:5000/getSimilarityScore"
|
21 |
+
get_face_locations_url = "http://127.0.0.1:5000/getFaceLocations"
|
22 |
+
|
23 |
+
# Make requests to each node with the image URL
|
24 |
+
try:
|
25 |
+
list=[]
|
26 |
+
|
27 |
+
response_text = requests.post(extract_audio_text_url, json={"audio_url": audio_url})
|
28 |
+
extracted_text = response_text.json()["transcription"]
|
29 |
+
list.append({"length of text":len(extracted_text)})
|
30 |
+
|
31 |
+
# # Request to extract text
|
32 |
+
# response_text = requests.post(extract_text_url, json={"imageUrl": image_url})
|
33 |
+
# extracted_text = response_text.json().get("extracted_text")
|
34 |
+
# list.append({"length of text":len(extracted_text)})
|
35 |
+
|
36 |
+
# # Request to get image embedding
|
37 |
+
# response_image_embedding = requests.post(get_image_embedding_url, json={"imageUrl": image_url})
|
38 |
+
# image_embedding = response_image_embedding.json().get("image_embedding")
|
39 |
+
# list.append({"length of image_embedding":len(image_embedding)})
|
40 |
+
|
41 |
+
# # Request to get text embedding
|
42 |
+
# response_text_embedding = requests.post(get_text_embedding_url, json={"text": extracted_text})
|
43 |
+
# text_embedding = response_text_embedding.json().get("text_embedding")
|
44 |
+
# list.append({"length of text_embedding":len(text_embedding)})
|
45 |
+
|
46 |
+
# # Request to get text description embedding
|
47 |
+
# response_text_description_embedding = requests.post(get_text_description_embedding_url, json={"text": "a image of mobile phone"})
|
48 |
+
# text_description_embedding = response_text_description_embedding.json().get("text_description_embedding")
|
49 |
+
# list.append({"length of text_description_embedding":len(text_description_embedding)})
|
50 |
+
|
51 |
+
# # Request to get audio embedding
|
52 |
+
# response_audio_embedding = requests.post(get_audio_embedding_url, json={"audioUrl": audio_url})
|
53 |
+
# audio_embedding = response_audio_embedding.json().get("audio_embedding")
|
54 |
+
# list.append({"length of audio_embedding":len(audio_embedding)})
|
55 |
+
|
56 |
+
# Request to get video embedding
|
57 |
+
response_video_embedding = requests.post(get_video_embedding_url, json={"videoUrl": video_url})
|
58 |
+
video_embedding = response_video_embedding.json().get("video_embedding")
|
59 |
+
list.append({"length of video_embedding":(video_embedding)})
|
60 |
+
|
61 |
+
# # Request to get object detection
|
62 |
+
# response_object_detection = requests.post(get_object_detection_url, json={"imageUrl": image_url})
|
63 |
+
# object_detection = response_object_detection.json().get("object_detection_results")
|
64 |
+
# list.append({"length of object_detection":len(object_detection)})
|
65 |
+
|
66 |
+
# # Request to get similarity score
|
67 |
+
# response_similarity_score = requests.post(get_similarity_score_url, json={"embedding1": text_description_embedding, "embedding2": image_embedding})
|
68 |
+
# similarity_score = response_similarity_score.json().get("similarity_score")
|
69 |
+
# list.append({"similarity_score":similarity_score})
|
70 |
+
|
71 |
+
# # Request to get face locations
|
72 |
+
# response_face_locations = requests.post(get_face_locations_url, json={"imageUrl": family_url})
|
73 |
+
# face_locations = response_face_locations.json().get("face_locations")
|
74 |
+
# list.append({"face_locations":face_locations})
|
75 |
+
print(list)
|
76 |
+
except Exception as e:
|
77 |
+
print("Error:", e)
|
utils/sentanceEmbedding/index.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
from utils.ImageAndTextEmbedding.index import getTextEmbedding
|
3 |
+
|
4 |
+
with open("word2vec_model.pkl", "rb") as f:
|
5 |
+
textEmbedding_model = pickle.load(f)
|
6 |
+
|
7 |
+
def get_text_vector(example_text):
|
8 |
+
# Tokenize the text into words
|
9 |
+
words = example_text.lower().split()
|
10 |
+
|
11 |
+
# Filter out words that are not in the vocabulary of the Word2Vec model
|
12 |
+
words_in_vocab = [word for word in words if word in textEmbedding_model]
|
13 |
+
|
14 |
+
# Calculate the average vector representation of the words
|
15 |
+
if words_in_vocab:
|
16 |
+
text_vector = sum(textEmbedding_model[word] for word in words_in_vocab) / len(words_in_vocab)
|
17 |
+
return text_vector.tolist()
|
18 |
+
else:
|
19 |
+
return None
|
20 |
+
|
21 |
+
def get_text_discription_vector(text):
|
22 |
+
return getTextEmbedding(text)
|
23 |
+
|
24 |
+
# Example usage:
|
25 |
+
# example_text = "This is an example sentence."
|
26 |
+
# text_vector = get_text_vector(example_text)
|
27 |
+
# if text_vector:
|
28 |
+
# print("Vector representation of the example text:", text_vector)
|
29 |
+
# else:
|
30 |
+
# print("None of the words in the example text are in the vocabulary of the Word2Vec model.")
|
31 |
+
|
32 |
+
print("Text embedding model loaded successfully!")
|
utils/similarityScore.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
def euclidean_similarity(embedding1, embedding2):
|
4 |
+
embedding1 = np.array(embedding1)
|
5 |
+
embedding2 = np.array(embedding2)
|
6 |
+
euclidean_distance = np.linalg.norm(embedding1 - embedding2)
|
7 |
+
# Convert distance to similarity score
|
8 |
+
similarity_score = 1 / (1 + euclidean_distance) # You can use other transformations as well
|
9 |
+
return similarity_score
|
10 |
+
|
11 |
+
def cosine_similarity(embedding1, embedding2):
|
12 |
+
dot_product = np.dot(embedding1, embedding2)
|
13 |
+
norm1 = np.linalg.norm(embedding1)
|
14 |
+
norm2 = np.linalg.norm(embedding2)
|
15 |
+
cosine_similarity = dot_product / (norm1 * norm2)
|
16 |
+
return cosine_similarity
|
17 |
+
|
18 |
+
def jaccard_similarity(embedding1, embedding2):
|
19 |
+
intersection = len(set(embedding1).intersection(set(embedding2)))
|
20 |
+
union = len(set(embedding1).union(set(embedding2)))
|
21 |
+
return intersection / union
|
22 |
+
|
23 |
+
def hamming_similarity(embedding1, embedding2):
|
24 |
+
distance = np.count_nonzero(embedding1 != embedding2)
|
25 |
+
similarity = 1 - distance / len(embedding1)
|
26 |
+
return similarity
|
27 |
+
|
28 |
+
def get_all_similarities(embedding1, embedding2):
|
29 |
+
euclidean = euclidean_similarity(embedding1, embedding2)
|
30 |
+
cosine = cosine_similarity(embedding1, embedding2)
|
31 |
+
jaccard = jaccard_similarity(embedding1, embedding2)
|
32 |
+
hamming = hamming_similarity(embedding1, embedding2)
|
33 |
+
return {"euclidean": euclidean, "cosine": cosine, "jaccard": jaccard, "hamming": hamming}
|
34 |
+
|
35 |
+
# Example usage:
|
36 |
+
# embedding1 = [1, 2, 3]
|
37 |
+
# embedding2 = [4, 5, 6]
|
38 |
+
# similarities = get_all_similarities(embedding1, embedding2)
|
39 |
+
# print(similarities)
|
40 |
+
|
41 |
+
print("Similarity score is working")
|
utils/videoEmbedding/index.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
from utils.imageEmbedding.index import get_image_embedding
|
4 |
+
from utils.imageToText.index import extract_text
|
5 |
+
import requests
|
6 |
+
|
7 |
+
|
8 |
+
def get_video_embedding(video_url):
|
9 |
+
try:
|
10 |
+
cap = cv2.VideoCapture(video_url)
|
11 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
12 |
+
interval = int(fps) # Capture a frame every second
|
13 |
+
|
14 |
+
frame_count = 0
|
15 |
+
video_embeddings = []
|
16 |
+
|
17 |
+
while(cap.isOpened()):
|
18 |
+
ret, frame = cap.read()
|
19 |
+
if ret:
|
20 |
+
if frame_count % interval == 0:
|
21 |
+
# Convert frame to binary format
|
22 |
+
ret, buffer = cv2.imencode('.jpg', frame)
|
23 |
+
if not ret:
|
24 |
+
continue
|
25 |
+
# Convert frame binary data to bytes
|
26 |
+
frame_bytes = buffer.tobytes()
|
27 |
+
|
28 |
+
# Call the route to get image embedding and extracted text
|
29 |
+
response = requests.post('https://imageprocessing-backend.hf.space/extract_image_text_and_embedding_binary_data', data=frame_bytes)
|
30 |
+
if response.status_code != 200:
|
31 |
+
print(f"Failed to process image: {frame_bytes}")
|
32 |
+
continue
|
33 |
+
|
34 |
+
result = response.json()
|
35 |
+
image_embedding = result.get("image_embedding")
|
36 |
+
extracted_text = result.get("extracted_text")
|
37 |
+
|
38 |
+
video_embeddings.append({"image_embedding": image_embedding, "extracted_text": extracted_text})
|
39 |
+
frame_count += 1
|
40 |
+
else:
|
41 |
+
break
|
42 |
+
|
43 |
+
cap.release()
|
44 |
+
return video_embeddings
|
45 |
+
|
46 |
+
except Exception as e:
|
47 |
+
print(e)
|
48 |
+
|
49 |
+
|
50 |
+
# Example usage:
|
51 |
+
# video_url = "https://utfs.io/f/ef6c037f-fa61-471a-8956-562bc2d62531-fzxs1i.mp4"
|
52 |
+
# video_embeddings = get_video_embedding(video_url)
|
53 |
+
# print("Video Embeddings:", video_embeddings)
|
word2vec_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa69b6e92ca17e1d8c76c072b75b4c5458f1e5ff1a882962549a3d7141c85e6f
|
3 |
+
size 3704150289
|
yamnet_saved_model/assets/yamnet_class_map.csv
ADDED
@@ -0,0 +1,522 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
index,mid,display_name
|
2 |
+
0,/m/09x0r,Speech
|
3 |
+
1,/m/0ytgt,"Child speech, kid speaking"
|
4 |
+
2,/m/01h8n0,Conversation
|
5 |
+
3,/m/02qldy,"Narration, monologue"
|
6 |
+
4,/m/0261r1,Babbling
|
7 |
+
5,/m/0brhx,Speech synthesizer
|
8 |
+
6,/m/07p6fty,Shout
|
9 |
+
7,/m/07q4ntr,Bellow
|
10 |
+
8,/m/07rwj3x,Whoop
|
11 |
+
9,/m/07sr1lc,Yell
|
12 |
+
10,/t/dd00135,Children shouting
|
13 |
+
11,/m/03qc9zr,Screaming
|
14 |
+
12,/m/02rtxlg,Whispering
|
15 |
+
13,/m/01j3sz,Laughter
|
16 |
+
14,/t/dd00001,Baby laughter
|
17 |
+
15,/m/07r660_,Giggle
|
18 |
+
16,/m/07s04w4,Snicker
|
19 |
+
17,/m/07sq110,Belly laugh
|
20 |
+
18,/m/07rgt08,"Chuckle, chortle"
|
21 |
+
19,/m/0463cq4,"Crying, sobbing"
|
22 |
+
20,/t/dd00002,"Baby cry, infant cry"
|
23 |
+
21,/m/07qz6j3,Whimper
|
24 |
+
22,/m/07qw_06,"Wail, moan"
|
25 |
+
23,/m/07plz5l,Sigh
|
26 |
+
24,/m/015lz1,Singing
|
27 |
+
25,/m/0l14jd,Choir
|
28 |
+
26,/m/01swy6,Yodeling
|
29 |
+
27,/m/02bk07,Chant
|
30 |
+
28,/m/01c194,Mantra
|
31 |
+
29,/t/dd00005,Child singing
|
32 |
+
30,/t/dd00006,Synthetic singing
|
33 |
+
31,/m/06bxc,Rapping
|
34 |
+
32,/m/02fxyj,Humming
|
35 |
+
33,/m/07s2xch,Groan
|
36 |
+
34,/m/07r4k75,Grunt
|
37 |
+
35,/m/01w250,Whistling
|
38 |
+
36,/m/0lyf6,Breathing
|
39 |
+
37,/m/07mzm6,Wheeze
|
40 |
+
38,/m/01d3sd,Snoring
|
41 |
+
39,/m/07s0dtb,Gasp
|
42 |
+
40,/m/07pyy8b,Pant
|
43 |
+
41,/m/07q0yl5,Snort
|
44 |
+
42,/m/01b_21,Cough
|
45 |
+
43,/m/0dl9sf8,Throat clearing
|
46 |
+
44,/m/01hsr_,Sneeze
|
47 |
+
45,/m/07ppn3j,Sniff
|
48 |
+
46,/m/06h7j,Run
|
49 |
+
47,/m/07qv_x_,Shuffle
|
50 |
+
48,/m/07pbtc8,"Walk, footsteps"
|
51 |
+
49,/m/03cczk,"Chewing, mastication"
|
52 |
+
50,/m/07pdhp0,Biting
|
53 |
+
51,/m/0939n_,Gargling
|
54 |
+
52,/m/01g90h,Stomach rumble
|
55 |
+
53,/m/03q5_w,"Burping, eructation"
|
56 |
+
54,/m/02p3nc,Hiccup
|
57 |
+
55,/m/02_nn,Fart
|
58 |
+
56,/m/0k65p,Hands
|
59 |
+
57,/m/025_jnm,Finger snapping
|
60 |
+
58,/m/0l15bq,Clapping
|
61 |
+
59,/m/01jg02,"Heart sounds, heartbeat"
|
62 |
+
60,/m/01jg1z,Heart murmur
|
63 |
+
61,/m/053hz1,Cheering
|
64 |
+
62,/m/028ght,Applause
|
65 |
+
63,/m/07rkbfh,Chatter
|
66 |
+
64,/m/03qtwd,Crowd
|
67 |
+
65,/m/07qfr4h,"Hubbub, speech noise, speech babble"
|
68 |
+
66,/t/dd00013,Children playing
|
69 |
+
67,/m/0jbk,Animal
|
70 |
+
68,/m/068hy,"Domestic animals, pets"
|
71 |
+
69,/m/0bt9lr,Dog
|
72 |
+
70,/m/05tny_,Bark
|
73 |
+
71,/m/07r_k2n,Yip
|
74 |
+
72,/m/07qf0zm,Howl
|
75 |
+
73,/m/07rc7d9,Bow-wow
|
76 |
+
74,/m/0ghcn6,Growling
|
77 |
+
75,/t/dd00136,Whimper (dog)
|
78 |
+
76,/m/01yrx,Cat
|
79 |
+
77,/m/02yds9,Purr
|
80 |
+
78,/m/07qrkrw,Meow
|
81 |
+
79,/m/07rjwbb,Hiss
|
82 |
+
80,/m/07r81j2,Caterwaul
|
83 |
+
81,/m/0ch8v,"Livestock, farm animals, working animals"
|
84 |
+
82,/m/03k3r,Horse
|
85 |
+
83,/m/07rv9rh,Clip-clop
|
86 |
+
84,/m/07q5rw0,"Neigh, whinny"
|
87 |
+
85,/m/01xq0k1,"Cattle, bovinae"
|
88 |
+
86,/m/07rpkh9,Moo
|
89 |
+
87,/m/0239kh,Cowbell
|
90 |
+
88,/m/068zj,Pig
|
91 |
+
89,/t/dd00018,Oink
|
92 |
+
90,/m/03fwl,Goat
|
93 |
+
91,/m/07q0h5t,Bleat
|
94 |
+
92,/m/07bgp,Sheep
|
95 |
+
93,/m/025rv6n,Fowl
|
96 |
+
94,/m/09b5t,"Chicken, rooster"
|
97 |
+
95,/m/07st89h,Cluck
|
98 |
+
96,/m/07qn5dc,"Crowing, cock-a-doodle-doo"
|
99 |
+
97,/m/01rd7k,Turkey
|
100 |
+
98,/m/07svc2k,Gobble
|
101 |
+
99,/m/09ddx,Duck
|
102 |
+
100,/m/07qdb04,Quack
|
103 |
+
101,/m/0dbvp,Goose
|
104 |
+
102,/m/07qwf61,Honk
|
105 |
+
103,/m/01280g,Wild animals
|
106 |
+
104,/m/0cdnk,"Roaring cats (lions, tigers)"
|
107 |
+
105,/m/04cvmfc,Roar
|
108 |
+
106,/m/015p6,Bird
|
109 |
+
107,/m/020bb7,"Bird vocalization, bird call, bird song"
|
110 |
+
108,/m/07pggtn,"Chirp, tweet"
|
111 |
+
109,/m/07sx8x_,Squawk
|
112 |
+
110,/m/0h0rv,"Pigeon, dove"
|
113 |
+
111,/m/07r_25d,Coo
|
114 |
+
112,/m/04s8yn,Crow
|
115 |
+
113,/m/07r5c2p,Caw
|
116 |
+
114,/m/09d5_,Owl
|
117 |
+
115,/m/07r_80w,Hoot
|
118 |
+
116,/m/05_wcq,"Bird flight, flapping wings"
|
119 |
+
117,/m/01z5f,"Canidae, dogs, wolves"
|
120 |
+
118,/m/06hps,"Rodents, rats, mice"
|
121 |
+
119,/m/04rmv,Mouse
|
122 |
+
120,/m/07r4gkf,Patter
|
123 |
+
121,/m/03vt0,Insect
|
124 |
+
122,/m/09xqv,Cricket
|
125 |
+
123,/m/09f96,Mosquito
|
126 |
+
124,/m/0h2mp,"Fly, housefly"
|
127 |
+
125,/m/07pjwq1,Buzz
|
128 |
+
126,/m/01h3n,"Bee, wasp, etc."
|
129 |
+
127,/m/09ld4,Frog
|
130 |
+
128,/m/07st88b,Croak
|
131 |
+
129,/m/078jl,Snake
|
132 |
+
130,/m/07qn4z3,Rattle
|
133 |
+
131,/m/032n05,Whale vocalization
|
134 |
+
132,/m/04rlf,Music
|
135 |
+
133,/m/04szw,Musical instrument
|
136 |
+
134,/m/0fx80y,Plucked string instrument
|
137 |
+
135,/m/0342h,Guitar
|
138 |
+
136,/m/02sgy,Electric guitar
|
139 |
+
137,/m/018vs,Bass guitar
|
140 |
+
138,/m/042v_gx,Acoustic guitar
|
141 |
+
139,/m/06w87,"Steel guitar, slide guitar"
|
142 |
+
140,/m/01glhc,Tapping (guitar technique)
|
143 |
+
141,/m/07s0s5r,Strum
|
144 |
+
142,/m/018j2,Banjo
|
145 |
+
143,/m/0jtg0,Sitar
|
146 |
+
144,/m/04rzd,Mandolin
|
147 |
+
145,/m/01bns_,Zither
|
148 |
+
146,/m/07xzm,Ukulele
|
149 |
+
147,/m/05148p4,Keyboard (musical)
|
150 |
+
148,/m/05r5c,Piano
|
151 |
+
149,/m/01s0ps,Electric piano
|
152 |
+
150,/m/013y1f,Organ
|
153 |
+
151,/m/03xq_f,Electronic organ
|
154 |
+
152,/m/03gvt,Hammond organ
|
155 |
+
153,/m/0l14qv,Synthesizer
|
156 |
+
154,/m/01v1d8,Sampler
|
157 |
+
155,/m/03q5t,Harpsichord
|
158 |
+
156,/m/0l14md,Percussion
|
159 |
+
157,/m/02hnl,Drum kit
|
160 |
+
158,/m/0cfdd,Drum machine
|
161 |
+
159,/m/026t6,Drum
|
162 |
+
160,/m/06rvn,Snare drum
|
163 |
+
161,/m/03t3fj,Rimshot
|
164 |
+
162,/m/02k_mr,Drum roll
|
165 |
+
163,/m/0bm02,Bass drum
|
166 |
+
164,/m/011k_j,Timpani
|
167 |
+
165,/m/01p970,Tabla
|
168 |
+
166,/m/01qbl,Cymbal
|
169 |
+
167,/m/03qtq,Hi-hat
|
170 |
+
168,/m/01sm1g,Wood block
|
171 |
+
169,/m/07brj,Tambourine
|
172 |
+
170,/m/05r5wn,Rattle (instrument)
|
173 |
+
171,/m/0xzly,Maraca
|
174 |
+
172,/m/0mbct,Gong
|
175 |
+
173,/m/016622,Tubular bells
|
176 |
+
174,/m/0j45pbj,Mallet percussion
|
177 |
+
175,/m/0dwsp,"Marimba, xylophone"
|
178 |
+
176,/m/0dwtp,Glockenspiel
|
179 |
+
177,/m/0dwt5,Vibraphone
|
180 |
+
178,/m/0l156b,Steelpan
|
181 |
+
179,/m/05pd6,Orchestra
|
182 |
+
180,/m/01kcd,Brass instrument
|
183 |
+
181,/m/0319l,French horn
|
184 |
+
182,/m/07gql,Trumpet
|
185 |
+
183,/m/07c6l,Trombone
|
186 |
+
184,/m/0l14_3,Bowed string instrument
|
187 |
+
185,/m/02qmj0d,String section
|
188 |
+
186,/m/07y_7,"Violin, fiddle"
|
189 |
+
187,/m/0d8_n,Pizzicato
|
190 |
+
188,/m/01xqw,Cello
|
191 |
+
189,/m/02fsn,Double bass
|
192 |
+
190,/m/085jw,"Wind instrument, woodwind instrument"
|
193 |
+
191,/m/0l14j_,Flute
|
194 |
+
192,/m/06ncr,Saxophone
|
195 |
+
193,/m/01wy6,Clarinet
|
196 |
+
194,/m/03m5k,Harp
|
197 |
+
195,/m/0395lw,Bell
|
198 |
+
196,/m/03w41f,Church bell
|
199 |
+
197,/m/027m70_,Jingle bell
|
200 |
+
198,/m/0gy1t2s,Bicycle bell
|
201 |
+
199,/m/07n_g,Tuning fork
|
202 |
+
200,/m/0f8s22,Chime
|
203 |
+
201,/m/026fgl,Wind chime
|
204 |
+
202,/m/0150b9,Change ringing (campanology)
|
205 |
+
203,/m/03qjg,Harmonica
|
206 |
+
204,/m/0mkg,Accordion
|
207 |
+
205,/m/0192l,Bagpipes
|
208 |
+
206,/m/02bxd,Didgeridoo
|
209 |
+
207,/m/0l14l2,Shofar
|
210 |
+
208,/m/07kc_,Theremin
|
211 |
+
209,/m/0l14t7,Singing bowl
|
212 |
+
210,/m/01hgjl,Scratching (performance technique)
|
213 |
+
211,/m/064t9,Pop music
|
214 |
+
212,/m/0glt670,Hip hop music
|
215 |
+
213,/m/02cz_7,Beatboxing
|
216 |
+
214,/m/06by7,Rock music
|
217 |
+
215,/m/03lty,Heavy metal
|
218 |
+
216,/m/05r6t,Punk rock
|
219 |
+
217,/m/0dls3,Grunge
|
220 |
+
218,/m/0dl5d,Progressive rock
|
221 |
+
219,/m/07sbbz2,Rock and roll
|
222 |
+
220,/m/05w3f,Psychedelic rock
|
223 |
+
221,/m/06j6l,Rhythm and blues
|
224 |
+
222,/m/0gywn,Soul music
|
225 |
+
223,/m/06cqb,Reggae
|
226 |
+
224,/m/01lyv,Country
|
227 |
+
225,/m/015y_n,Swing music
|
228 |
+
226,/m/0gg8l,Bluegrass
|
229 |
+
227,/m/02x8m,Funk
|
230 |
+
228,/m/02w4v,Folk music
|
231 |
+
229,/m/06j64v,Middle Eastern music
|
232 |
+
230,/m/03_d0,Jazz
|
233 |
+
231,/m/026z9,Disco
|
234 |
+
232,/m/0ggq0m,Classical music
|
235 |
+
233,/m/05lls,Opera
|
236 |
+
234,/m/02lkt,Electronic music
|
237 |
+
235,/m/03mb9,House music
|
238 |
+
236,/m/07gxw,Techno
|
239 |
+
237,/m/07s72n,Dubstep
|
240 |
+
238,/m/0283d,Drum and bass
|
241 |
+
239,/m/0m0jc,Electronica
|
242 |
+
240,/m/08cyft,Electronic dance music
|
243 |
+
241,/m/0fd3y,Ambient music
|
244 |
+
242,/m/07lnk,Trance music
|
245 |
+
243,/m/0g293,Music of Latin America
|
246 |
+
244,/m/0ln16,Salsa music
|
247 |
+
245,/m/0326g,Flamenco
|
248 |
+
246,/m/0155w,Blues
|
249 |
+
247,/m/05fw6t,Music for children
|
250 |
+
248,/m/02v2lh,New-age music
|
251 |
+
249,/m/0y4f8,Vocal music
|
252 |
+
250,/m/0z9c,A capella
|
253 |
+
251,/m/0164x2,Music of Africa
|
254 |
+
252,/m/0145m,Afrobeat
|
255 |
+
253,/m/02mscn,Christian music
|
256 |
+
254,/m/016cjb,Gospel music
|
257 |
+
255,/m/028sqc,Music of Asia
|
258 |
+
256,/m/015vgc,Carnatic music
|
259 |
+
257,/m/0dq0md,Music of Bollywood
|
260 |
+
258,/m/06rqw,Ska
|
261 |
+
259,/m/02p0sh1,Traditional music
|
262 |
+
260,/m/05rwpb,Independent music
|
263 |
+
261,/m/074ft,Song
|
264 |
+
262,/m/025td0t,Background music
|
265 |
+
263,/m/02cjck,Theme music
|
266 |
+
264,/m/03r5q_,Jingle (music)
|
267 |
+
265,/m/0l14gg,Soundtrack music
|
268 |
+
266,/m/07pkxdp,Lullaby
|
269 |
+
267,/m/01z7dr,Video game music
|
270 |
+
268,/m/0140xf,Christmas music
|
271 |
+
269,/m/0ggx5q,Dance music
|
272 |
+
270,/m/04wptg,Wedding music
|
273 |
+
271,/t/dd00031,Happy music
|
274 |
+
272,/t/dd00033,Sad music
|
275 |
+
273,/t/dd00034,Tender music
|
276 |
+
274,/t/dd00035,Exciting music
|
277 |
+
275,/t/dd00036,Angry music
|
278 |
+
276,/t/dd00037,Scary music
|
279 |
+
277,/m/03m9d0z,Wind
|
280 |
+
278,/m/09t49,Rustling leaves
|
281 |
+
279,/t/dd00092,Wind noise (microphone)
|
282 |
+
280,/m/0jb2l,Thunderstorm
|
283 |
+
281,/m/0ngt1,Thunder
|
284 |
+
282,/m/0838f,Water
|
285 |
+
283,/m/06mb1,Rain
|
286 |
+
284,/m/07r10fb,Raindrop
|
287 |
+
285,/t/dd00038,Rain on surface
|
288 |
+
286,/m/0j6m2,Stream
|
289 |
+
287,/m/0j2kx,Waterfall
|
290 |
+
288,/m/05kq4,Ocean
|
291 |
+
289,/m/034srq,"Waves, surf"
|
292 |
+
290,/m/06wzb,Steam
|
293 |
+
291,/m/07swgks,Gurgling
|
294 |
+
292,/m/02_41,Fire
|
295 |
+
293,/m/07pzfmf,Crackle
|
296 |
+
294,/m/07yv9,Vehicle
|
297 |
+
295,/m/019jd,"Boat, Water vehicle"
|
298 |
+
296,/m/0hsrw,"Sailboat, sailing ship"
|
299 |
+
297,/m/056ks2,"Rowboat, canoe, kayak"
|
300 |
+
298,/m/02rlv9,"Motorboat, speedboat"
|
301 |
+
299,/m/06q74,Ship
|
302 |
+
300,/m/012f08,Motor vehicle (road)
|
303 |
+
301,/m/0k4j,Car
|
304 |
+
302,/m/0912c9,"Vehicle horn, car horn, honking"
|
305 |
+
303,/m/07qv_d5,Toot
|
306 |
+
304,/m/02mfyn,Car alarm
|
307 |
+
305,/m/04gxbd,"Power windows, electric windows"
|
308 |
+
306,/m/07rknqz,Skidding
|
309 |
+
307,/m/0h9mv,Tire squeal
|
310 |
+
308,/t/dd00134,Car passing by
|
311 |
+
309,/m/0ltv,"Race car, auto racing"
|
312 |
+
310,/m/07r04,Truck
|
313 |
+
311,/m/0gvgw0,Air brake
|
314 |
+
312,/m/05x_td,"Air horn, truck horn"
|
315 |
+
313,/m/02rhddq,Reversing beeps
|
316 |
+
314,/m/03cl9h,"Ice cream truck, ice cream van"
|
317 |
+
315,/m/01bjv,Bus
|
318 |
+
316,/m/03j1ly,Emergency vehicle
|
319 |
+
317,/m/04qvtq,Police car (siren)
|
320 |
+
318,/m/012n7d,Ambulance (siren)
|
321 |
+
319,/m/012ndj,"Fire engine, fire truck (siren)"
|
322 |
+
320,/m/04_sv,Motorcycle
|
323 |
+
321,/m/0btp2,"Traffic noise, roadway noise"
|
324 |
+
322,/m/06d_3,Rail transport
|
325 |
+
323,/m/07jdr,Train
|
326 |
+
324,/m/04zmvq,Train whistle
|
327 |
+
325,/m/0284vy3,Train horn
|
328 |
+
326,/m/01g50p,"Railroad car, train wagon"
|
329 |
+
327,/t/dd00048,Train wheels squealing
|
330 |
+
328,/m/0195fx,"Subway, metro, underground"
|
331 |
+
329,/m/0k5j,Aircraft
|
332 |
+
330,/m/014yck,Aircraft engine
|
333 |
+
331,/m/04229,Jet engine
|
334 |
+
332,/m/02l6bg,"Propeller, airscrew"
|
335 |
+
333,/m/09ct_,Helicopter
|
336 |
+
334,/m/0cmf2,"Fixed-wing aircraft, airplane"
|
337 |
+
335,/m/0199g,Bicycle
|
338 |
+
336,/m/06_fw,Skateboard
|
339 |
+
337,/m/02mk9,Engine
|
340 |
+
338,/t/dd00065,Light engine (high frequency)
|
341 |
+
339,/m/08j51y,"Dental drill, dentist's drill"
|
342 |
+
340,/m/01yg9g,Lawn mower
|
343 |
+
341,/m/01j4z9,Chainsaw
|
344 |
+
342,/t/dd00066,Medium engine (mid frequency)
|
345 |
+
343,/t/dd00067,Heavy engine (low frequency)
|
346 |
+
344,/m/01h82_,Engine knocking
|
347 |
+
345,/t/dd00130,Engine starting
|
348 |
+
346,/m/07pb8fc,Idling
|
349 |
+
347,/m/07q2z82,"Accelerating, revving, vroom"
|
350 |
+
348,/m/02dgv,Door
|
351 |
+
349,/m/03wwcy,Doorbell
|
352 |
+
350,/m/07r67yg,Ding-dong
|
353 |
+
351,/m/02y_763,Sliding door
|
354 |
+
352,/m/07rjzl8,Slam
|
355 |
+
353,/m/07r4wb8,Knock
|
356 |
+
354,/m/07qcpgn,Tap
|
357 |
+
355,/m/07q6cd_,Squeak
|
358 |
+
356,/m/0642b4,Cupboard open or close
|
359 |
+
357,/m/0fqfqc,Drawer open or close
|
360 |
+
358,/m/04brg2,"Dishes, pots, and pans"
|
361 |
+
359,/m/023pjk,"Cutlery, silverware"
|
362 |
+
360,/m/07pn_8q,Chopping (food)
|
363 |
+
361,/m/0dxrf,Frying (food)
|
364 |
+
362,/m/0fx9l,Microwave oven
|
365 |
+
363,/m/02pjr4,Blender
|
366 |
+
364,/m/02jz0l,"Water tap, faucet"
|
367 |
+
365,/m/0130jx,Sink (filling or washing)
|
368 |
+
366,/m/03dnzn,Bathtub (filling or washing)
|
369 |
+
367,/m/03wvsk,Hair dryer
|
370 |
+
368,/m/01jt3m,Toilet flush
|
371 |
+
369,/m/012xff,Toothbrush
|
372 |
+
370,/m/04fgwm,Electric toothbrush
|
373 |
+
371,/m/0d31p,Vacuum cleaner
|
374 |
+
372,/m/01s0vc,Zipper (clothing)
|
375 |
+
373,/m/03v3yw,Keys jangling
|
376 |
+
374,/m/0242l,Coin (dropping)
|
377 |
+
375,/m/01lsmm,Scissors
|
378 |
+
376,/m/02g901,"Electric shaver, electric razor"
|
379 |
+
377,/m/05rj2,Shuffling cards
|
380 |
+
378,/m/0316dw,Typing
|
381 |
+
379,/m/0c2wf,Typewriter
|
382 |
+
380,/m/01m2v,Computer keyboard
|
383 |
+
381,/m/081rb,Writing
|
384 |
+
382,/m/07pp_mv,Alarm
|
385 |
+
383,/m/07cx4,Telephone
|
386 |
+
384,/m/07pp8cl,Telephone bell ringing
|
387 |
+
385,/m/01hnzm,Ringtone
|
388 |
+
386,/m/02c8p,"Telephone dialing, DTMF"
|
389 |
+
387,/m/015jpf,Dial tone
|
390 |
+
388,/m/01z47d,Busy signal
|
391 |
+
389,/m/046dlr,Alarm clock
|
392 |
+
390,/m/03kmc9,Siren
|
393 |
+
391,/m/0dgbq,Civil defense siren
|
394 |
+
392,/m/030rvx,Buzzer
|
395 |
+
393,/m/01y3hg,"Smoke detector, smoke alarm"
|
396 |
+
394,/m/0c3f7m,Fire alarm
|
397 |
+
395,/m/04fq5q,Foghorn
|
398 |
+
396,/m/0l156k,Whistle
|
399 |
+
397,/m/06hck5,Steam whistle
|
400 |
+
398,/t/dd00077,Mechanisms
|
401 |
+
399,/m/02bm9n,"Ratchet, pawl"
|
402 |
+
400,/m/01x3z,Clock
|
403 |
+
401,/m/07qjznt,Tick
|
404 |
+
402,/m/07qjznl,Tick-tock
|
405 |
+
403,/m/0l7xg,Gears
|
406 |
+
404,/m/05zc1,Pulleys
|
407 |
+
405,/m/0llzx,Sewing machine
|
408 |
+
406,/m/02x984l,Mechanical fan
|
409 |
+
407,/m/025wky1,Air conditioning
|
410 |
+
408,/m/024dl,Cash register
|
411 |
+
409,/m/01m4t,Printer
|
412 |
+
410,/m/0dv5r,Camera
|
413 |
+
411,/m/07bjf,Single-lens reflex camera
|
414 |
+
412,/m/07k1x,Tools
|
415 |
+
413,/m/03l9g,Hammer
|
416 |
+
414,/m/03p19w,Jackhammer
|
417 |
+
415,/m/01b82r,Sawing
|
418 |
+
416,/m/02p01q,Filing (rasp)
|
419 |
+
417,/m/023vsd,Sanding
|
420 |
+
418,/m/0_ksk,Power tool
|
421 |
+
419,/m/01d380,Drill
|
422 |
+
420,/m/014zdl,Explosion
|
423 |
+
421,/m/032s66,"Gunshot, gunfire"
|
424 |
+
422,/m/04zjc,Machine gun
|
425 |
+
423,/m/02z32qm,Fusillade
|
426 |
+
424,/m/0_1c,Artillery fire
|
427 |
+
425,/m/073cg4,Cap gun
|
428 |
+
426,/m/0g6b5,Fireworks
|
429 |
+
427,/g/122z_qxw,Firecracker
|
430 |
+
428,/m/07qsvvw,"Burst, pop"
|
431 |
+
429,/m/07pxg6y,Eruption
|
432 |
+
430,/m/07qqyl4,Boom
|
433 |
+
431,/m/083vt,Wood
|
434 |
+
432,/m/07pczhz,Chop
|
435 |
+
433,/m/07pl1bw,Splinter
|
436 |
+
434,/m/07qs1cx,Crack
|
437 |
+
435,/m/039jq,Glass
|
438 |
+
436,/m/07q7njn,"Chink, clink"
|
439 |
+
437,/m/07rn7sz,Shatter
|
440 |
+
438,/m/04k94,Liquid
|
441 |
+
439,/m/07rrlb6,"Splash, splatter"
|
442 |
+
440,/m/07p6mqd,Slosh
|
443 |
+
441,/m/07qlwh6,Squish
|
444 |
+
442,/m/07r5v4s,Drip
|
445 |
+
443,/m/07prgkl,Pour
|
446 |
+
444,/m/07pqc89,"Trickle, dribble"
|
447 |
+
445,/t/dd00088,Gush
|
448 |
+
446,/m/07p7b8y,Fill (with liquid)
|
449 |
+
447,/m/07qlf79,Spray
|
450 |
+
448,/m/07ptzwd,Pump (liquid)
|
451 |
+
449,/m/07ptfmf,Stir
|
452 |
+
450,/m/0dv3j,Boiling
|
453 |
+
451,/m/0790c,Sonar
|
454 |
+
452,/m/0dl83,Arrow
|
455 |
+
453,/m/07rqsjt,"Whoosh, swoosh, swish"
|
456 |
+
454,/m/07qnq_y,"Thump, thud"
|
457 |
+
455,/m/07rrh0c,Thunk
|
458 |
+
456,/m/0b_fwt,Electronic tuner
|
459 |
+
457,/m/02rr_,Effects unit
|
460 |
+
458,/m/07m2kt,Chorus effect
|
461 |
+
459,/m/018w8,Basketball bounce
|
462 |
+
460,/m/07pws3f,Bang
|
463 |
+
461,/m/07ryjzk,"Slap, smack"
|
464 |
+
462,/m/07rdhzs,"Whack, thwack"
|
465 |
+
463,/m/07pjjrj,"Smash, crash"
|
466 |
+
464,/m/07pc8lb,Breaking
|
467 |
+
465,/m/07pqn27,Bouncing
|
468 |
+
466,/m/07rbp7_,Whip
|
469 |
+
467,/m/07pyf11,Flap
|
470 |
+
468,/m/07qb_dv,Scratch
|
471 |
+
469,/m/07qv4k0,Scrape
|
472 |
+
470,/m/07pdjhy,Rub
|
473 |
+
471,/m/07s8j8t,Roll
|
474 |
+
472,/m/07plct2,Crushing
|
475 |
+
473,/t/dd00112,"Crumpling, crinkling"
|
476 |
+
474,/m/07qcx4z,Tearing
|
477 |
+
475,/m/02fs_r,"Beep, bleep"
|
478 |
+
476,/m/07qwdck,Ping
|
479 |
+
477,/m/07phxs1,Ding
|
480 |
+
478,/m/07rv4dm,Clang
|
481 |
+
479,/m/07s02z0,Squeal
|
482 |
+
480,/m/07qh7jl,Creak
|
483 |
+
481,/m/07qwyj0,Rustle
|
484 |
+
482,/m/07s34ls,Whir
|
485 |
+
483,/m/07qmpdm,Clatter
|
486 |
+
484,/m/07p9k1k,Sizzle
|
487 |
+
485,/m/07qc9xj,Clicking
|
488 |
+
486,/m/07rwm0c,Clickety-clack
|
489 |
+
487,/m/07phhsh,Rumble
|
490 |
+
488,/m/07qyrcz,Plop
|
491 |
+
489,/m/07qfgpx,"Jingle, tinkle"
|
492 |
+
490,/m/07rcgpl,Hum
|
493 |
+
491,/m/07p78v5,Zing
|
494 |
+
492,/t/dd00121,Boing
|
495 |
+
493,/m/07s12q4,Crunch
|
496 |
+
494,/m/028v0c,Silence
|
497 |
+
495,/m/01v_m0,Sine wave
|
498 |
+
496,/m/0b9m1,Harmonic
|
499 |
+
497,/m/0hdsk,Chirp tone
|
500 |
+
498,/m/0c1dj,Sound effect
|
501 |
+
499,/m/07pt_g0,Pulse
|
502 |
+
500,/t/dd00125,"Inside, small room"
|
503 |
+
501,/t/dd00126,"Inside, large room or hall"
|
504 |
+
502,/t/dd00127,"Inside, public space"
|
505 |
+
503,/t/dd00128,"Outside, urban or manmade"
|
506 |
+
504,/t/dd00129,"Outside, rural or natural"
|
507 |
+
505,/m/01b9nn,Reverberation
|
508 |
+
506,/m/01jnbd,Echo
|
509 |
+
507,/m/096m7z,Noise
|
510 |
+
508,/m/06_y0by,Environmental noise
|
511 |
+
509,/m/07rgkc5,Static
|
512 |
+
510,/m/06xkwv,Mains hum
|
513 |
+
511,/m/0g12c5,Distortion
|
514 |
+
512,/m/08p9q4,Sidetone
|
515 |
+
513,/m/07szfh9,Cacophony
|
516 |
+
514,/m/0chx_,White noise
|
517 |
+
515,/m/0cj0r,Pink noise
|
518 |
+
516,/m/07p_0gm,Throbbing
|
519 |
+
517,/m/01jwx6,Vibration
|
520 |
+
518,/m/07c52,Television
|
521 |
+
519,/m/06bz3,Radio
|
522 |
+
520,/m/07hvw1,Field recording
|
yamnet_saved_model/fingerprint.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd5fc9281fd065d54cf089e0fcdee0b36172677c5261d02d51ac57bb16ddb08e
|
3 |
+
size 57
|
yamnet_saved_model/saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14827dd28b4400f559dac66563edbb8c648b7aaad9e5ad2214ffd7759b832d2d
|
3 |
+
size 2947713
|
yamnet_saved_model/variables/variables.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ac2e57a8feb68ba9fbc93248ed2d7f2ec8940a95de07a5131d1bc39c6ffbe31
|
3 |
+
size 15140606
|
yamnet_saved_model/variables/variables.index
ADDED
Binary file (7.4 kB). View file
|
|