Spaces:

Image-Processsing
/

Backend

Sleeping

App Files Files Community

Image-Processsing commited on Apr 30, 2024

Commit

85b6267

1 Parent(s): 7799bd8

Upload 18 files

Browse files

Files changed (19) hide show

.gitattributes +1 -0
Dockerfile +11 -0
app.py +346 -0
requirements.txt +134 -0
utils/ImageAndTextEmbedding/index.py +40 -0
utils/audioEmbedding/index.py +28 -0
utils/imageEmbedding/index.py +17 -0
utils/imageToText/index.py +24 -0
utils/objectDetection/index.py +12 -0
utils/sample.py +77 -0
utils/sentanceEmbedding/index.py +32 -0
utils/similarityScore.py +41 -0
utils/videoEmbedding/index.py +53 -0
word2vec_model.pkl +3 -0
yamnet_saved_model/assets/yamnet_class_map.csv +522 -0
yamnet_saved_model/fingerprint.pb +3 -0
yamnet_saved_model/saved_model.pb +3 -0
yamnet_saved_model/variables/variables.data-00000-of-00001 +3 -0
yamnet_saved_model/variables/variables.index +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+yamnet_saved_model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]

app.py ADDED Viewed

	@@ -0,0 +1,346 @@

+import moviepy.editor as mp
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import requests
+from io import BytesIO
+import speech_recognition as sr
+import io
+import fitz
+import numpy as np
+import cv2
+from flask_caching import Cache
+from utils.audioEmbedding.index import extract_audio_embeddings
+from utils.videoEmbedding.index import get_video_embedding
+from utils.imageToText.index import extract_text
+from utils.sentanceEmbedding.index import get_text_vector , get_text_discription_vector
+from utils.imageEmbedding.index import get_image_embedding
+from utils.similarityScore import get_all_similarities
+from utils.objectDetection.index import detect_objects
+app = Flask(__name__)
+cache = Cache(app, config={'CACHE_TYPE': 'simple'})  # You can choose a caching type based on your requirements
+CORS(app)
+import moviepy.editor as mp
+import tempfile
+def get_face_locations(binary_data):
+    # Convert binary image data to numpy array
+    print(1)
+    nparr = np.frombuffer(binary_data, np.uint8)
+    image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    # Load the pre-trained face detection model
+    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
+    # Convert the image to grayscale
+    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    # Detect faces in the image
+    faces = face_cascade.detectMultiScale(gray_image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
+    # Extract face locations
+    print(2)
+    face_locations = []
+    for (x, y, w, h) in faces:
+        face_locations.append({"top": y, "right": x + w, "bottom": y + h, "left": x})
+    print(3)
+    return face_locations
+def seperate_image_text_from_pdf(pdf_url):
+    # List to store page information
+    pages_info = []
+    # Fetch the PDF from the URL
+    response = requests.get(pdf_url)
+    if response.status_code == 200:
+        # Create a temporary file to save the PDF data
+        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+            tmp_file.write(response.content)
+            tmp_file_path = tmp_file.name
+        # Open the PDF
+        pdf = fitz.open(tmp_file_path)
+        # Iterate through each page
+        for page_num in range(len(pdf)):
+            page = pdf.load_page(page_num)
+            # Extract text
+            text = page.get_text()
+            # Count images
+            image_list = page.get_images(full=True)
+            # Convert images to BytesIO and store in a list
+            images_bytes = []
+            for img_index, img_info in enumerate(image_list):
+                xref = img_info[0]
+                base_image = pdf.extract_image(xref)
+                image_bytes = base_image["image"]
+                images_bytes.append(image_bytes)
+            # Store page information in a dictionary
+            page_info = {
+                "pgno": page_num + 1,
+                "images": images_bytes,
+                "text": text
+            }
+            # Append page information to the list
+            pages_info.append(page_info)
+        # Close the PDF
+        pdf.close()
+        # Clean up the temporary file
+        import os
+        os.unlink(tmp_file_path)
+    else:
+        print("Failed to fetch the PDF from the URL.")
+    return pages_info
+def pdf_image_text_embedding_and_text_embedding(pages_info):
+    try:
+    # List to store page embeddings
+        page_embeddings = []
+        # Iterate through each page
+        for page in pages_info:
+            # Extract text from the page
+            text = page["text"]
+            # Extract images from the page
+            images = page["images"]
+            # List to store image embeddings
+            image_embeddings = []
+            # Iterate through each image
+            for image in images:
+                try:
+                    # Assuming image is a binary data (e.g., bytes)
+                    response = requests.post('https://imageprocessing-backend.hf.space/extract_image_text_and_embedding_binary_data', data=image)
+                    if response.status_code != 200:
+                        print(f"Failed to process image: {image}")
+                        continue
+                    result = response.json()
+                    image_embedding = result.get("image_embedding")
+                    extracted_text = result.get("extracted_text")
+                    # Append the image embedding to the list
+                    image_embeddings.append({"image_embedding": image_embedding, "extracted_text": extracted_text})
+                    print(len(image_embeddings))
+                except Exception as e:
+                    print(f"error")
+            # Get the text embedding
+            # Store the page embeddings in a dictionary
+            page_embedding = {
+                "images": image_embeddings,
+                "text": text,
+            }
+            # Append the page embedding to the list
+            page_embeddings.append(page_embedding)
+        return page_embeddings
+    except Exception as e:
+        print("An error occurred:", e)
+        return "Error"
+def separate_audio_from_video(video_url):
+    try:
+        # Load the video file
+        video = mp.VideoFileClip(video_url)
+        # Extract audio
+        audio = video.audio
+        # Create a temporary file to write the audio data
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
+            temp_audio_filename = temp_audio_file.name
+            # Write the audio data to the temporary file
+            audio.write_audiofile(temp_audio_filename)
+            # Read the audio data from the temporary file as bytes
+            with open(temp_audio_filename, "rb") as f:
+                audio_bytes = f.read()
+        return audio_bytes
+    except Exception as e:
+        print("An error occurred:", e)
+@cache.cached(timeout=300)
+@app.route('/get_text_embedding', methods=['POST'])
+def get_text_embedding_route():
+    try:
+        text = request.json.get("text")
+        text_embedding = get_text_vector(text)
+        return jsonify({"text_embedding": text_embedding}), 200
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@cache.cached(timeout=300)
+@app.route('/extract_audio_text_and_embedding', methods=['POST'])
+def get_audio_embedding_route():
+    audio_url = request.json.get('audio_url')
+    print(audio_url)
+    response = requests.get(audio_url)
+    audio_data = response.content
+    audio_embedding = extract_audio_embeddings(audio_data)
+    audio_embedding_list = audio_embedding
+    audio_file = BytesIO(audio_data)
+    r = sr.Recognizer()
+    with sr.AudioFile(audio_file) as source:
+        audio_data = r.record(source)
+    extracted_text = ""
+    try:
+        text = r.recognize_google(audio_data)
+        extracted_text = text
+    except Exception as e:
+        print(e)
+    return jsonify({"extracted_text": extracted_text, "audio_embedding": audio_embedding_list}), 200
+# Route to get image embeddings
+@cache.cached(timeout=300)
+@app.route('/extract_image_text_and_embedding', methods=['POST'])
+def get_image_embedding_route():
+    try:
+        image_url = request.json.get("imageUrl")
+        print(image_url)
+        response = requests.get(image_url)
+        if response.status_code != 200:
+            return jsonify({"error": "Failed to download image"}), 500
+        binary_data = response.content
+        extracted_text = extract_text(binary_data)
+        image_embedding = get_image_embedding(binary_data)
+        image_embedding_list = image_embedding.tolist()
+        return jsonify({"image_embedding": image_embedding_list,"extracted_text":extracted_text}), 200
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+# Route to get video embeddings
+@cache.cached(timeout=300)
+@app.route('/extract_video_text_and_embedding', methods=['POST'])
+def get_video_embedding_route():
+    try:
+        video_url = request.json.get("videoUrl")
+        audio_data = separate_audio_from_video(video_url)
+        audio_embedding = extract_audio_embeddings(audio_data)
+        audio_embedding_list = audio_embedding
+        audio_file = io.BytesIO(audio_data)
+        r = sr.Recognizer()
+        with sr.AudioFile(audio_file) as source:
+            audio_data = r.record(source)
+        extracted_text = ""
+        try:
+            text = r.recognize_google(audio_data)
+            extracted_text = text
+        except Exception as e:
+            print(e)
+        video_embedding = get_video_embedding(video_url)
+        return jsonify({"video_embedding": video_embedding,"extracted_audio_text": extracted_text, "audio_embedding": audio_embedding_list}), 200
+    except Exception as e:
+        print(e)
+        return jsonify({"error": str(e)}), 500
+@cache.cached(timeout=300)
+@app.route('/extract_pdf_text_and_embedding', methods=['POST'])
+def extract_pdf_text_and_embedding():
+    try:
+        pdf_url = request.json.get("pdfUrl")
+        print(1)
+        pages_info = seperate_image_text_from_pdf(pdf_url)
+        # print(pages_info)
+        content = pdf_image_text_embedding_and_text_embedding(pages_info)
+        # print(content)
+        return jsonify({"content": content}), 200
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+# Route to get text description embeddings
+@cache.cached(timeout=300)
+@app.route('/getTextDescriptionEmbedding', methods=['POST'])
+def get_text_description_embedding_route():
+    try:
+        text = request.json.get("text")
+        text_description_embedding = get_text_discription_vector(text)
+        return jsonify({"text_description_embedding": text_description_embedding.tolist()}), 200
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+# Route to get object detection results
+@cache.cached(timeout=300)
+@app.route('/detectObjects', methods=['POST'])
+def detect_objects_route():
+    try:
+        image_url = request.json.get("imageUrl")
+        response = requests.get(image_url)
+        if response.status_code != 200:
+            return jsonify({"error": "Failed to download image"}), 500
+        binary_data = response.content
+        object_detection_results = detect_objects(binary_data)
+        return jsonify({"object_detection_results": object_detection_results}), 200
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+# Route to get face locations
+@cache.cached(timeout=300)
+@app.route('/getFaceLocations', methods=['POST'])
+def get_face_locations_route():
+    try:
+        image_url = request.json.get("imageUrl")
+        response = requests.get(image_url)
+        print(11)
+        if response.status_code != 200:
+            return jsonify({"error": "Failed to download image"}), 500
+        print(22)
+        binary_data = response.content
+        face_locations = get_face_locations(binary_data)
+        print(33)
+        print("ok",face_locations)
+        return jsonify({"face_locations": str(face_locations)}), 200
+    except Exception as e:
+        print(e)
+        return jsonify({"error": str(e)}), 500
+# Route to get similarity score
+@cache.cached(timeout=300)
+@app.route('/getSimilarityScore', methods=['POST'])
+def get_similarity_score_route():
+    try:
+        embedding1 = request.json.get("embedding1")
+        embedding2 = request.json.get("embedding2")
+        # Assuming embeddings are provided as lists
+        similarity_score = get_all_similarities(embedding1, embedding2)
+        return jsonify({"similarity_score": similarity_score}), 200
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route('/')
+def hello():
+    return 'Hello, World!'
+app.run()

requirements.txt ADDED Viewed

	@@ -0,0 +1,134 @@

+absl-py==2.1.0
+aiohttp==3.9.3
+aiosignal==1.3.1
+annotated-types==0.6.0
+anyio==4.3.0
+asgiref==3.7.2
+astunparse==1.6.3
+attrs==23.2.0
+audioread==3.0.1
+beautifulsoup4==4.12.3
+blinker==1.7.0
+cachelib==0.9.0
+certifi==2024.2.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+decorator==4.4.2
+distro==1.9.0
+Django==5.0.1
+django-cors-headers==4.3.1
+django-restframework==0.0.1
+djangorestframework==3.14.0
+dlib==19.24.2
+et-xmlfile==1.1.0
+face-recognition==1.3.0
+face_recognition_models==0.3.0
+filelock==3.13.3
+Flask==3.0.2
+Flask-Caching==2.1.0
+Flask-Cors==4.0.0
+flatbuffers==24.3.25
+frozenlist==1.4.1
+fsspec==2024.3.1
+gast==0.5.4
+gensim==4.3.2
+google-pasta==0.2.0
+grpcio==1.62.1
+h11==0.14.0
+h5py==3.10.0
+httpcore==1.0.4
+httpx==0.27.0
+huggingface-hub==0.22.2
+idna==3.6
+imageio==2.34.0
+imageio-ffmpeg==0.4.9
+itsdangerous==2.1.2
+Jinja2==3.1.3
+joblib==1.3.2
+keras==3.1.1
+lazy_loader==0.3
+libclang==18.1.1
+librosa==0.10.1
+llvmlite==0.42.0
+Markdown==3.6
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+ml-dtypes==0.3.2
+moviepy==1.0.3
+mpmath==1.3.0
+msgpack==1.0.8
+multidict==6.0.5
+namex==0.0.7
+networkx==3.2.1
+numba==0.59.1
+numpy==1.26.3
+openai==0.28.0
+opencv-python==4.9.0.80
+openpyxl==3.1.2
+opt-einsum==3.3.0
+optree==0.11.0
+outcome==1.3.0.post0
+packaging==24.0
+pandas==2.2.0
+pillow==10.3.0
+platformdirs==4.2.0
+pooch==1.8.1
+proglog==0.1.10
+protobuf==4.25.3
+pycparser==2.22
+pydantic==2.6.3
+pydantic_core==2.16.3
+pydub==0.25.1
+Pygments==2.17.2
+PyMuPDF==1.24.0
+PyMuPDFb==1.24.0
+PySocks==1.7.1
+python-dateutil==2.8.2
+python-dotenv==1.0.1
+pytz==2023.4
+PyYAML==6.0.1
+regex==2023.12.25
+requests==2.31.0
+rich==13.7.1
+safetensors==0.4.2
+scikit-learn==1.4.1.post1
+scipy==1.12.0
+selenium==4.19.0
+setuptools==69.2.0
+six==1.16.0
+smart-open==7.0.4
+sniffio==1.3.1
+sortedcontainers==2.4.0
+soundfile==0.12.1
+soupsieve==2.5
+soxr==0.3.7
+SpeechRecognition==3.10.1
+sqlparse==0.4.4
+sympy==1.12
+tensorboard==2.16.2
+tensorboard-data-server==0.7.2
+tensorflow==2.16.1
+tensorflow-intel==2.16.1
+termcolor==2.4.0
+tf_keras==2.16.0
+threadpoolctl==3.4.0
+timm==0.9.16
+tokenizers==0.15.2
+torch==2.2.2
+torchvision==0.17.2
+tqdm==4.66.2
+transformers==4.39.2
+trio==0.25.0
+trio-websocket==0.11.1
+typing_extensions==4.10.0
+tzdata==2023.4
+urllib3==2.2.1
+webdriver-manager==4.0.1
+Werkzeug==3.0.1
+wheel==0.43.0
+wrapt==1.16.0
+wsproto==1.2.0
+yarl==1.9.4

utils/ImageAndTextEmbedding/index.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from PIL import Image
+import io
+from transformers import AutoTokenizer, CLIPProcessor, CLIPModel
+import torch
+# Load CLIP model and processor
+model_name = "openai/clip-vit-base-patch32"
+loaded_model = CLIPModel.from_pretrained(model_name)
+loaded_processor = CLIPProcessor.from_pretrained(model_name)
+def getTextEmbedding(text):
+    # Preprocess the text
+    print("tear")
+    inputs_text = loaded_processor(text=[text], return_tensors="pt", padding=True)
+    print("here")
+    # Forward pass through the model
+    with torch.no_grad():
+        # Get the text features
+        text_features = loaded_model.get_text_features(input_ids=inputs_text.input_ids, attention_mask=inputs_text.attention_mask)
+    print("bear")
+    # Convert tensor to numpy array for better readability
+    text_embedding = text_features.squeeze().numpy()
+    print("done")
+    return text_embedding
+def getImageEmbedding(binary_image_data):
+    # Load and preprocess the image
+    image = Image.open(io.BytesIO(binary_image_data))
+    inputs = loaded_processor(images=image, return_tensors="pt", padding=True)
+    # Forward pass through the model
+    with torch.no_grad():
+        # Get the image features
+        image_features = loaded_model.get_image_features(pixel_values=inputs.pixel_values)
+    # Convert tensor to numpy array for better readability
+    image_embedding = image_features.squeeze().numpy()
+    return image_embedding

utils/audioEmbedding/index.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import tensorflow as tf
+import numpy as np
+import librosa
+import pickle
+import io
+# Load the YAMNet model from the SavedModel format
+yamnet_model = tf.saved_model.load('yamnet_saved_model')
+# Function to extract embeddings from audio file using YAMNet
+def extract_audio_embeddings(audio_binary):
+    # Load audio from binary data using librosa
+    audio, sample_rate = librosa.load(io.BytesIO(audio_binary), sr=16000)  # YAMNet requires a sample rate of 16kHz
+    # Convert audio to float32 tensor
+    audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32)
+    # Extract embeddings using YAMNet model
+    scores, embeddings, spectrogram = yamnet_model(audio_tensor)
+    embeddings_list = embeddings.numpy().tolist()  # Convert embeddings to a list of lists
+    return embeddings_list
+# Example usage
+if __name__ == "__main__":
+    image_audio_path = "pictures/users/1a.mp3"
+    # Extract embeddings from image audio file
+    image_audio_embeddings = extract_audio_embeddings(image_audio_path)
+    print("Embeddings for", image_audio_path)
+    print(image_audio_embeddings)
+print("audio embedding model loaded succesfully")

utils/imageEmbedding/index.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import pickle
+from torchvision import transforms
+from PIL import Image
+import torch
+import io
+from utils.ImageAndTextEmbedding.index import getImageEmbedding
+def get_image_embedding(image_bytes):
+    print("comming 1")
+    return getImageEmbedding(image_bytes)
+# Example: Load image data from file and get its embedding
+# image_data = open("pictures/users/2.jpg", "rb").read()
+# embedding = get_image_embedding(image_data)
+# print(embedding)
+print("Image embedding model loaded successfully!")

utils/imageToText/index.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import pickle
+import re
+from PIL import Image
+from transformers import pipeline
+import io
+def clean_text(text):
+    clean_text = re.sub(r'<[^>]+>', '', text)
+    clean_text = clean_text.strip()
+    clean_text = re.sub(r'\s+', ' ', clean_text)
+    return clean_text
+pipe = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD")
+def extract_text(binary_image):
+    image = Image.open(io.BytesIO(binary_image))
+    result = pipe(image)
+    text = result[0]['generated_text']
+    cleaned_text = clean_text(text)
+    return cleaned_text
+# print(extract_text(open("pictures/users/2.jpg", "rb").read()))
+print("OCR pipeline loaded successfully!")

utils/objectDetection/index.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from transformers import pipeline
+from PIL import Image
+from io import BytesIO
+# Load the object detection pipeline
+object_detection_pipeline = pipeline("object-detection", model="ciasimbaya/ObjectDetection")
+def detect_objects(image_bytes):
+    image = Image.open(BytesIO(image_bytes))
+    result = object_detection_pipeline(image)
+    return result
+print("object detection model loaded succesfully")

utils/sample.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import requests
+# Define the image URL
+image_url = "https://utfs.io/f/47589c6c-6ce0-4baf-b75d-b1ec5d4d9dda-213j1w.jpg"
+audio_url = "https://utfs.io/f/b84a84a2-b68f-49c5-8b7c-d76d894f6d3a-c5qjj4.wav"
+video_url = "https://utfs.io/f/ef6c037f-fa61-471a-8956-562bc2d62531-fzxs1i.mp4"
+family_url = "https://i.pinimg.com/originals/b2/20/14/b22014ca275e94097386aab222469caf.jpg"
+# Define the URLs of the three nodes
+extract_text_url = "http://127.0.0.1:5000/extractText"
+extract_audio_text_url = "http://127.0.0.1:5000/extractAudioText"
+get_image_embedding_url = "http://127.0.0.1:5000/getImageEmbedding"
+get_text_embedding_url = "http://127.0.0.1:5000/getTextEmbedding"
+get_text_description_embedding_url  = "http://127.0.0.1:5000/getTextDescriptionEmbedding"
+get_audio_embedding_url = "http://127.0.0.1:5000/getAudioEmbedding"
+get_audio_extracted_text_url = "http://127.0.0.1:5000/getAudioExtractedText"
+get_video_embedding_url = "http://127.0.0.1:5000/getVideoEmbedding"
+get_object_detection_url = "http://127.0.0.1:5000/detectObjects"
+get_similarity_score_url = "http://127.0.0.1:5000/getSimilarityScore"
+get_face_locations_url = "http://127.0.0.1:5000/getFaceLocations"
+# Make requests to each node with the image URL
+try:
+    list=[]
+    response_text = requests.post(extract_audio_text_url, json={"audio_url": audio_url})
+    extracted_text = response_text.json()["transcription"]
+    list.append({"length of text":len(extracted_text)})
+    # # Request to extract text
+    # response_text = requests.post(extract_text_url, json={"imageUrl": image_url})
+    # extracted_text = response_text.json().get("extracted_text")
+    # list.append({"length of text":len(extracted_text)})
+    # # Request to get image embedding
+    # response_image_embedding = requests.post(get_image_embedding_url, json={"imageUrl": image_url})
+    # image_embedding = response_image_embedding.json().get("image_embedding")
+    # list.append({"length of image_embedding":len(image_embedding)})
+    # # Request to get text embedding
+    # response_text_embedding = requests.post(get_text_embedding_url, json={"text": extracted_text})
+    # text_embedding = response_text_embedding.json().get("text_embedding")
+    # list.append({"length of text_embedding":len(text_embedding)})
+    # # Request to get text description embedding
+    # response_text_description_embedding = requests.post(get_text_description_embedding_url, json={"text": "a image of mobile phone"})
+    # text_description_embedding = response_text_description_embedding.json().get("text_description_embedding")
+    # list.append({"length of text_description_embedding":len(text_description_embedding)})
+    # # Request to get audio embedding
+    # response_audio_embedding = requests.post(get_audio_embedding_url, json={"audioUrl": audio_url})
+    # audio_embedding = response_audio_embedding.json().get("audio_embedding")
+    # list.append({"length of audio_embedding":len(audio_embedding)})
+    # Request to get video embedding
+    response_video_embedding = requests.post(get_video_embedding_url, json={"videoUrl": video_url})
+    video_embedding = response_video_embedding.json().get("video_embedding")
+    list.append({"length of video_embedding":(video_embedding)})
+    # # Request to get object detection
+    # response_object_detection = requests.post(get_object_detection_url, json={"imageUrl": image_url})
+    # object_detection = response_object_detection.json().get("object_detection_results")
+    # list.append({"length of object_detection":len(object_detection)})
+    # # Request to get similarity score
+    # response_similarity_score = requests.post(get_similarity_score_url, json={"embedding1": text_description_embedding, "embedding2": image_embedding})
+    # similarity_score = response_similarity_score.json().get("similarity_score")
+    # list.append({"similarity_score":similarity_score})
+    # # Request to get face locations
+    # response_face_locations = requests.post(get_face_locations_url, json={"imageUrl": family_url})
+    # face_locations = response_face_locations.json().get("face_locations")
+    # list.append({"face_locations":face_locations})
+    print(list)
+except Exception as e:
+    print("Error:", e)

utils/sentanceEmbedding/index.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import pickle
+from utils.ImageAndTextEmbedding.index import getTextEmbedding
+with open("word2vec_model.pkl", "rb") as f:
+    textEmbedding_model = pickle.load(f)
+def get_text_vector(example_text):
+    # Tokenize the text into words
+    words = example_text.lower().split()
+    # Filter out words that are not in the vocabulary of the Word2Vec model
+    words_in_vocab = [word for word in words if word in textEmbedding_model]
+    # Calculate the average vector representation of the words
+    if words_in_vocab:
+        text_vector = sum(textEmbedding_model[word] for word in words_in_vocab) / len(words_in_vocab)
+        return text_vector.tolist()
+    else:
+        return None
+def get_text_discription_vector(text):
+    return getTextEmbedding(text)
+# Example usage:
+# example_text = "This is an example sentence."
+# text_vector = get_text_vector(example_text)
+# if text_vector:
+#     print("Vector representation of the example text:", text_vector)
+# else:
+#     print("None of the words in the example text are in the vocabulary of the Word2Vec model.")
+print("Text embedding model loaded successfully!")

utils/similarityScore.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import numpy as np
+def euclidean_similarity(embedding1, embedding2):
+    embedding1 = np.array(embedding1)
+    embedding2 = np.array(embedding2)
+    euclidean_distance = np.linalg.norm(embedding1 - embedding2)
+    # Convert distance to similarity score
+    similarity_score = 1 / (1 + euclidean_distance)  # You can use other transformations as well
+    return similarity_score
+def cosine_similarity(embedding1, embedding2):
+    dot_product = np.dot(embedding1, embedding2)
+    norm1 = np.linalg.norm(embedding1)
+    norm2 = np.linalg.norm(embedding2)
+    cosine_similarity = dot_product / (norm1 * norm2)
+    return cosine_similarity
+def jaccard_similarity(embedding1, embedding2):
+    intersection = len(set(embedding1).intersection(set(embedding2)))
+    union = len(set(embedding1).union(set(embedding2)))
+    return intersection / union
+def hamming_similarity(embedding1, embedding2):
+    distance = np.count_nonzero(embedding1 != embedding2)
+    similarity = 1 - distance / len(embedding1)
+    return similarity
+def get_all_similarities(embedding1, embedding2):
+    euclidean = euclidean_similarity(embedding1, embedding2)
+    cosine = cosine_similarity(embedding1, embedding2)
+    jaccard = jaccard_similarity(embedding1, embedding2)
+    hamming = hamming_similarity(embedding1, embedding2)
+    return {"euclidean": euclidean, "cosine": cosine, "jaccard": jaccard, "hamming": hamming}
+# Example usage:
+# embedding1 = [1, 2, 3]
+# embedding2 = [4, 5, 6]
+# similarities = get_all_similarities(embedding1, embedding2)
+# print(similarities)
+print("Similarity score is working")

utils/videoEmbedding/index.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import cv2
+import numpy as np
+from utils.imageEmbedding.index import get_image_embedding
+from utils.imageToText.index import extract_text
+import requests
+def get_video_embedding(video_url):
+    try:
+        cap = cv2.VideoCapture(video_url)
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        interval = int(fps)  # Capture a frame every second
+        frame_count = 0
+        video_embeddings = []
+        while(cap.isOpened()):
+            ret, frame = cap.read()
+            if ret:
+                if frame_count % interval == 0:
+                    # Convert frame to binary format
+                    ret, buffer = cv2.imencode('.jpg', frame)
+                    if not ret:
+                        continue
+                    # Convert frame binary data to bytes
+                    frame_bytes = buffer.tobytes()
+                    # Call the route to get image embedding and extracted text
+                    response = requests.post('https://imageprocessing-backend.hf.space/extract_image_text_and_embedding_binary_data', data=frame_bytes)
+                    if response.status_code != 200:
+                        print(f"Failed to process image: {frame_bytes}")
+                        continue
+                    result = response.json()
+                    image_embedding = result.get("image_embedding")
+                    extracted_text = result.get("extracted_text")
+                    video_embeddings.append({"image_embedding": image_embedding, "extracted_text": extracted_text})
+                frame_count += 1
+            else:
+                break
+        cap.release()
+        return video_embeddings
+    except Exception as e:
+        print(e)
+# Example usage:
+# video_url = "https://utfs.io/f/ef6c037f-fa61-471a-8956-562bc2d62531-fzxs1i.mp4"
+# video_embeddings = get_video_embedding(video_url)
+# print("Video Embeddings:", video_embeddings)

word2vec_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa69b6e92ca17e1d8c76c072b75b4c5458f1e5ff1a882962549a3d7141c85e6f
+size 3704150289

yamnet_saved_model/assets/yamnet_class_map.csv ADDED Viewed

	@@ -0,0 +1,522 @@

+index,mid,display_name
+0,/m/09x0r,Speech
+1,/m/0ytgt,"Child speech, kid speaking"
+2,/m/01h8n0,Conversation
+3,/m/02qldy,"Narration, monologue"
+4,/m/0261r1,Babbling
+5,/m/0brhx,Speech synthesizer
+6,/m/07p6fty,Shout
+7,/m/07q4ntr,Bellow
+8,/m/07rwj3x,Whoop
+9,/m/07sr1lc,Yell
+10,/t/dd00135,Children shouting
+11,/m/03qc9zr,Screaming
+12,/m/02rtxlg,Whispering
+13,/m/01j3sz,Laughter
+14,/t/dd00001,Baby laughter
+15,/m/07r660_,Giggle
+16,/m/07s04w4,Snicker
+17,/m/07sq110,Belly laugh
+18,/m/07rgt08,"Chuckle, chortle"
+19,/m/0463cq4,"Crying, sobbing"
+20,/t/dd00002,"Baby cry, infant cry"
+21,/m/07qz6j3,Whimper
+22,/m/07qw_06,"Wail, moan"
+23,/m/07plz5l,Sigh
+24,/m/015lz1,Singing
+25,/m/0l14jd,Choir
+26,/m/01swy6,Yodeling
+27,/m/02bk07,Chant
+28,/m/01c194,Mantra
+29,/t/dd00005,Child singing
+30,/t/dd00006,Synthetic singing
+31,/m/06bxc,Rapping
+32,/m/02fxyj,Humming
+33,/m/07s2xch,Groan
+34,/m/07r4k75,Grunt
+35,/m/01w250,Whistling
+36,/m/0lyf6,Breathing
+37,/m/07mzm6,Wheeze
+38,/m/01d3sd,Snoring
+39,/m/07s0dtb,Gasp
+40,/m/07pyy8b,Pant
+41,/m/07q0yl5,Snort
+42,/m/01b_21,Cough
+43,/m/0dl9sf8,Throat clearing
+44,/m/01hsr_,Sneeze
+45,/m/07ppn3j,Sniff
+46,/m/06h7j,Run
+47,/m/07qv_x_,Shuffle
+48,/m/07pbtc8,"Walk, footsteps"
+49,/m/03cczk,"Chewing, mastication"
+50,/m/07pdhp0,Biting
+51,/m/0939n_,Gargling
+52,/m/01g90h,Stomach rumble
+53,/m/03q5_w,"Burping, eructation"
+54,/m/02p3nc,Hiccup
+55,/m/02_nn,Fart
+56,/m/0k65p,Hands
+57,/m/025_jnm,Finger snapping
+58,/m/0l15bq,Clapping
+59,/m/01jg02,"Heart sounds, heartbeat"
+60,/m/01jg1z,Heart murmur
+61,/m/053hz1,Cheering
+62,/m/028ght,Applause
+63,/m/07rkbfh,Chatter
+64,/m/03qtwd,Crowd
+65,/m/07qfr4h,"Hubbub, speech noise, speech babble"
+66,/t/dd00013,Children playing
+67,/m/0jbk,Animal
+68,/m/068hy,"Domestic animals, pets"
+69,/m/0bt9lr,Dog
+70,/m/05tny_,Bark
+71,/m/07r_k2n,Yip
+72,/m/07qf0zm,Howl
+73,/m/07rc7d9,Bow-wow
+74,/m/0ghcn6,Growling
+75,/t/dd00136,Whimper (dog)
+76,/m/01yrx,Cat
+77,/m/02yds9,Purr
+78,/m/07qrkrw,Meow
+79,/m/07rjwbb,Hiss
+80,/m/07r81j2,Caterwaul
+81,/m/0ch8v,"Livestock, farm animals, working animals"
+82,/m/03k3r,Horse
+83,/m/07rv9rh,Clip-clop
+84,/m/07q5rw0,"Neigh, whinny"
+85,/m/01xq0k1,"Cattle, bovinae"
+86,/m/07rpkh9,Moo
+87,/m/0239kh,Cowbell
+88,/m/068zj,Pig
+89,/t/dd00018,Oink
+90,/m/03fwl,Goat
+91,/m/07q0h5t,Bleat
+92,/m/07bgp,Sheep
+93,/m/025rv6n,Fowl
+94,/m/09b5t,"Chicken, rooster"
+95,/m/07st89h,Cluck
+96,/m/07qn5dc,"Crowing, cock-a-doodle-doo"
+97,/m/01rd7k,Turkey
+98,/m/07svc2k,Gobble
+99,/m/09ddx,Duck
+100,/m/07qdb04,Quack
+101,/m/0dbvp,Goose
+102,/m/07qwf61,Honk
+103,/m/01280g,Wild animals
+104,/m/0cdnk,"Roaring cats (lions, tigers)"
+105,/m/04cvmfc,Roar
+106,/m/015p6,Bird
+107,/m/020bb7,"Bird vocalization, bird call, bird song"
+108,/m/07pggtn,"Chirp, tweet"
+109,/m/07sx8x_,Squawk
+110,/m/0h0rv,"Pigeon, dove"
+111,/m/07r_25d,Coo
+112,/m/04s8yn,Crow
+113,/m/07r5c2p,Caw
+114,/m/09d5_,Owl
+115,/m/07r_80w,Hoot
+116,/m/05_wcq,"Bird flight, flapping wings"
+117,/m/01z5f,"Canidae, dogs, wolves"
+118,/m/06hps,"Rodents, rats, mice"
+119,/m/04rmv,Mouse
+120,/m/07r4gkf,Patter
+121,/m/03vt0,Insect
+122,/m/09xqv,Cricket
+123,/m/09f96,Mosquito
+124,/m/0h2mp,"Fly, housefly"
+125,/m/07pjwq1,Buzz
+126,/m/01h3n,"Bee, wasp, etc."
+127,/m/09ld4,Frog
+128,/m/07st88b,Croak
+129,/m/078jl,Snake
+130,/m/07qn4z3,Rattle
+131,/m/032n05,Whale vocalization
+132,/m/04rlf,Music
+133,/m/04szw,Musical instrument
+134,/m/0fx80y,Plucked string instrument
+135,/m/0342h,Guitar
+136,/m/02sgy,Electric guitar
+137,/m/018vs,Bass guitar
+138,/m/042v_gx,Acoustic guitar
+139,/m/06w87,"Steel guitar, slide guitar"
+140,/m/01glhc,Tapping (guitar technique)
+141,/m/07s0s5r,Strum
+142,/m/018j2,Banjo
+143,/m/0jtg0,Sitar
+144,/m/04rzd,Mandolin
+145,/m/01bns_,Zither
+146,/m/07xzm,Ukulele
+147,/m/05148p4,Keyboard (musical)
+148,/m/05r5c,Piano
+149,/m/01s0ps,Electric piano
+150,/m/013y1f,Organ
+151,/m/03xq_f,Electronic organ
+152,/m/03gvt,Hammond organ
+153,/m/0l14qv,Synthesizer
+154,/m/01v1d8,Sampler
+155,/m/03q5t,Harpsichord
+156,/m/0l14md,Percussion
+157,/m/02hnl,Drum kit
+158,/m/0cfdd,Drum machine
+159,/m/026t6,Drum
+160,/m/06rvn,Snare drum
+161,/m/03t3fj,Rimshot
+162,/m/02k_mr,Drum roll
+163,/m/0bm02,Bass drum
+164,/m/011k_j,Timpani
+165,/m/01p970,Tabla
+166,/m/01qbl,Cymbal
+167,/m/03qtq,Hi-hat
+168,/m/01sm1g,Wood block
+169,/m/07brj,Tambourine
+170,/m/05r5wn,Rattle (instrument)
+171,/m/0xzly,Maraca
+172,/m/0mbct,Gong
+173,/m/016622,Tubular bells
+174,/m/0j45pbj,Mallet percussion
+175,/m/0dwsp,"Marimba, xylophone"
+176,/m/0dwtp,Glockenspiel
+177,/m/0dwt5,Vibraphone
+178,/m/0l156b,Steelpan
+179,/m/05pd6,Orchestra
+180,/m/01kcd,Brass instrument
+181,/m/0319l,French horn
+182,/m/07gql,Trumpet
+183,/m/07c6l,Trombone
+184,/m/0l14_3,Bowed string instrument
+185,/m/02qmj0d,String section
+186,/m/07y_7,"Violin, fiddle"
+187,/m/0d8_n,Pizzicato
+188,/m/01xqw,Cello
+189,/m/02fsn,Double bass
+190,/m/085jw,"Wind instrument, woodwind instrument"
+191,/m/0l14j_,Flute
+192,/m/06ncr,Saxophone
+193,/m/01wy6,Clarinet
+194,/m/03m5k,Harp
+195,/m/0395lw,Bell
+196,/m/03w41f,Church bell
+197,/m/027m70_,Jingle bell
+198,/m/0gy1t2s,Bicycle bell
+199,/m/07n_g,Tuning fork
+200,/m/0f8s22,Chime
+201,/m/026fgl,Wind chime
+202,/m/0150b9,Change ringing (campanology)
+203,/m/03qjg,Harmonica
+204,/m/0mkg,Accordion
+205,/m/0192l,Bagpipes
+206,/m/02bxd,Didgeridoo
+207,/m/0l14l2,Shofar
+208,/m/07kc_,Theremin
+209,/m/0l14t7,Singing bowl
+210,/m/01hgjl,Scratching (performance technique)
+211,/m/064t9,Pop music
+212,/m/0glt670,Hip hop music
+213,/m/02cz_7,Beatboxing
+214,/m/06by7,Rock music
+215,/m/03lty,Heavy metal
+216,/m/05r6t,Punk rock
+217,/m/0dls3,Grunge
+218,/m/0dl5d,Progressive rock
+219,/m/07sbbz2,Rock and roll
+220,/m/05w3f,Psychedelic rock
+221,/m/06j6l,Rhythm and blues
+222,/m/0gywn,Soul music
+223,/m/06cqb,Reggae
+224,/m/01lyv,Country
+225,/m/015y_n,Swing music
+226,/m/0gg8l,Bluegrass
+227,/m/02x8m,Funk
+228,/m/02w4v,Folk music
+229,/m/06j64v,Middle Eastern music
+230,/m/03_d0,Jazz
+231,/m/026z9,Disco
+232,/m/0ggq0m,Classical music
+233,/m/05lls,Opera
+234,/m/02lkt,Electronic music
+235,/m/03mb9,House music
+236,/m/07gxw,Techno
+237,/m/07s72n,Dubstep
+238,/m/0283d,Drum and bass
+239,/m/0m0jc,Electronica
+240,/m/08cyft,Electronic dance music
+241,/m/0fd3y,Ambient music
+242,/m/07lnk,Trance music
+243,/m/0g293,Music of Latin America
+244,/m/0ln16,Salsa music
+245,/m/0326g,Flamenco
+246,/m/0155w,Blues
+247,/m/05fw6t,Music for children
+248,/m/02v2lh,New-age music
+249,/m/0y4f8,Vocal music
+250,/m/0z9c,A capella
+251,/m/0164x2,Music of Africa
+252,/m/0145m,Afrobeat
+253,/m/02mscn,Christian music
+254,/m/016cjb,Gospel music
+255,/m/028sqc,Music of Asia
+256,/m/015vgc,Carnatic music
+257,/m/0dq0md,Music of Bollywood
+258,/m/06rqw,Ska
+259,/m/02p0sh1,Traditional music
+260,/m/05rwpb,Independent music
+261,/m/074ft,Song
+262,/m/025td0t,Background music
+263,/m/02cjck,Theme music
+264,/m/03r5q_,Jingle (music)
+265,/m/0l14gg,Soundtrack music
+266,/m/07pkxdp,Lullaby
+267,/m/01z7dr,Video game music
+268,/m/0140xf,Christmas music
+269,/m/0ggx5q,Dance music
+270,/m/04wptg,Wedding music
+271,/t/dd00031,Happy music
+272,/t/dd00033,Sad music
+273,/t/dd00034,Tender music
+274,/t/dd00035,Exciting music
+275,/t/dd00036,Angry music
+276,/t/dd00037,Scary music
+277,/m/03m9d0z,Wind
+278,/m/09t49,Rustling leaves
+279,/t/dd00092,Wind noise (microphone)
+280,/m/0jb2l,Thunderstorm
+281,/m/0ngt1,Thunder
+282,/m/0838f,Water
+283,/m/06mb1,Rain
+284,/m/07r10fb,Raindrop
+285,/t/dd00038,Rain on surface
+286,/m/0j6m2,Stream
+287,/m/0j2kx,Waterfall
+288,/m/05kq4,Ocean
+289,/m/034srq,"Waves, surf"
+290,/m/06wzb,Steam
+291,/m/07swgks,Gurgling
+292,/m/02_41,Fire
+293,/m/07pzfmf,Crackle
+294,/m/07yv9,Vehicle
+295,/m/019jd,"Boat, Water vehicle"
+296,/m/0hsrw,"Sailboat, sailing ship"
+297,/m/056ks2,"Rowboat, canoe, kayak"
+298,/m/02rlv9,"Motorboat, speedboat"
+299,/m/06q74,Ship
+300,/m/012f08,Motor vehicle (road)
+301,/m/0k4j,Car
+302,/m/0912c9,"Vehicle horn, car horn, honking"
+303,/m/07qv_d5,Toot
+304,/m/02mfyn,Car alarm
+305,/m/04gxbd,"Power windows, electric windows"
+306,/m/07rknqz,Skidding
+307,/m/0h9mv,Tire squeal
+308,/t/dd00134,Car passing by
+309,/m/0ltv,"Race car, auto racing"
+310,/m/07r04,Truck
+311,/m/0gvgw0,Air brake
+312,/m/05x_td,"Air horn, truck horn"
+313,/m/02rhddq,Reversing beeps
+314,/m/03cl9h,"Ice cream truck, ice cream van"
+315,/m/01bjv,Bus
+316,/m/03j1ly,Emergency vehicle
+317,/m/04qvtq,Police car (siren)
+318,/m/012n7d,Ambulance (siren)
+319,/m/012ndj,"Fire engine, fire truck (siren)"
+320,/m/04_sv,Motorcycle
+321,/m/0btp2,"Traffic noise, roadway noise"
+322,/m/06d_3,Rail transport
+323,/m/07jdr,Train
+324,/m/04zmvq,Train whistle
+325,/m/0284vy3,Train horn
+326,/m/01g50p,"Railroad car, train wagon"
+327,/t/dd00048,Train wheels squealing
+328,/m/0195fx,"Subway, metro, underground"
+329,/m/0k5j,Aircraft
+330,/m/014yck,Aircraft engine
+331,/m/04229,Jet engine
+332,/m/02l6bg,"Propeller, airscrew"
+333,/m/09ct_,Helicopter
+334,/m/0cmf2,"Fixed-wing aircraft, airplane"
+335,/m/0199g,Bicycle
+336,/m/06_fw,Skateboard
+337,/m/02mk9,Engine
+338,/t/dd00065,Light engine (high frequency)
+339,/m/08j51y,"Dental drill, dentist's drill"
+340,/m/01yg9g,Lawn mower
+341,/m/01j4z9,Chainsaw
+342,/t/dd00066,Medium engine (mid frequency)
+343,/t/dd00067,Heavy engine (low frequency)
+344,/m/01h82_,Engine knocking
+345,/t/dd00130,Engine starting
+346,/m/07pb8fc,Idling
+347,/m/07q2z82,"Accelerating, revving, vroom"
+348,/m/02dgv,Door
+349,/m/03wwcy,Doorbell
+350,/m/07r67yg,Ding-dong
+351,/m/02y_763,Sliding door
+352,/m/07rjzl8,Slam
+353,/m/07r4wb8,Knock
+354,/m/07qcpgn,Tap
+355,/m/07q6cd_,Squeak
+356,/m/0642b4,Cupboard open or close
+357,/m/0fqfqc,Drawer open or close
+358,/m/04brg2,"Dishes, pots, and pans"
+359,/m/023pjk,"Cutlery, silverware"
+360,/m/07pn_8q,Chopping (food)
+361,/m/0dxrf,Frying (food)
+362,/m/0fx9l,Microwave oven
+363,/m/02pjr4,Blender
+364,/m/02jz0l,"Water tap, faucet"
+365,/m/0130jx,Sink (filling or washing)
+366,/m/03dnzn,Bathtub (filling or washing)
+367,/m/03wvsk,Hair dryer
+368,/m/01jt3m,Toilet flush
+369,/m/012xff,Toothbrush
+370,/m/04fgwm,Electric toothbrush
+371,/m/0d31p,Vacuum cleaner
+372,/m/01s0vc,Zipper (clothing)
+373,/m/03v3yw,Keys jangling
+374,/m/0242l,Coin (dropping)
+375,/m/01lsmm,Scissors
+376,/m/02g901,"Electric shaver, electric razor"
+377,/m/05rj2,Shuffling cards
+378,/m/0316dw,Typing
+379,/m/0c2wf,Typewriter
+380,/m/01m2v,Computer keyboard
+381,/m/081rb,Writing
+382,/m/07pp_mv,Alarm
+383,/m/07cx4,Telephone
+384,/m/07pp8cl,Telephone bell ringing
+385,/m/01hnzm,Ringtone
+386,/m/02c8p,"Telephone dialing, DTMF"
+387,/m/015jpf,Dial tone
+388,/m/01z47d,Busy signal
+389,/m/046dlr,Alarm clock
+390,/m/03kmc9,Siren
+391,/m/0dgbq,Civil defense siren
+392,/m/030rvx,Buzzer
+393,/m/01y3hg,"Smoke detector, smoke alarm"
+394,/m/0c3f7m,Fire alarm
+395,/m/04fq5q,Foghorn
+396,/m/0l156k,Whistle
+397,/m/06hck5,Steam whistle
+398,/t/dd00077,Mechanisms
+399,/m/02bm9n,"Ratchet, pawl"
+400,/m/01x3z,Clock
+401,/m/07qjznt,Tick
+402,/m/07qjznl,Tick-tock
+403,/m/0l7xg,Gears
+404,/m/05zc1,Pulleys
+405,/m/0llzx,Sewing machine
+406,/m/02x984l,Mechanical fan
+407,/m/025wky1,Air conditioning
+408,/m/024dl,Cash register
+409,/m/01m4t,Printer
+410,/m/0dv5r,Camera
+411,/m/07bjf,Single-lens reflex camera
+412,/m/07k1x,Tools
+413,/m/03l9g,Hammer
+414,/m/03p19w,Jackhammer
+415,/m/01b82r,Sawing
+416,/m/02p01q,Filing (rasp)
+417,/m/023vsd,Sanding
+418,/m/0_ksk,Power tool
+419,/m/01d380,Drill
+420,/m/014zdl,Explosion
+421,/m/032s66,"Gunshot, gunfire"
+422,/m/04zjc,Machine gun
+423,/m/02z32qm,Fusillade
+424,/m/0_1c,Artillery fire
+425,/m/073cg4,Cap gun
+426,/m/0g6b5,Fireworks
+427,/g/122z_qxw,Firecracker
+428,/m/07qsvvw,"Burst, pop"
+429,/m/07pxg6y,Eruption
+430,/m/07qqyl4,Boom
+431,/m/083vt,Wood
+432,/m/07pczhz,Chop
+433,/m/07pl1bw,Splinter
+434,/m/07qs1cx,Crack
+435,/m/039jq,Glass
+436,/m/07q7njn,"Chink, clink"
+437,/m/07rn7sz,Shatter
+438,/m/04k94,Liquid
+439,/m/07rrlb6,"Splash, splatter"
+440,/m/07p6mqd,Slosh
+441,/m/07qlwh6,Squish
+442,/m/07r5v4s,Drip
+443,/m/07prgkl,Pour
+444,/m/07pqc89,"Trickle, dribble"
+445,/t/dd00088,Gush
+446,/m/07p7b8y,Fill (with liquid)
+447,/m/07qlf79,Spray
+448,/m/07ptzwd,Pump (liquid)
+449,/m/07ptfmf,Stir
+450,/m/0dv3j,Boiling
+451,/m/0790c,Sonar
+452,/m/0dl83,Arrow
+453,/m/07rqsjt,"Whoosh, swoosh, swish"
+454,/m/07qnq_y,"Thump, thud"
+455,/m/07rrh0c,Thunk
+456,/m/0b_fwt,Electronic tuner
+457,/m/02rr_,Effects unit
+458,/m/07m2kt,Chorus effect
+459,/m/018w8,Basketball bounce
+460,/m/07pws3f,Bang
+461,/m/07ryjzk,"Slap, smack"
+462,/m/07rdhzs,"Whack, thwack"
+463,/m/07pjjrj,"Smash, crash"
+464,/m/07pc8lb,Breaking
+465,/m/07pqn27,Bouncing
+466,/m/07rbp7_,Whip
+467,/m/07pyf11,Flap
+468,/m/07qb_dv,Scratch
+469,/m/07qv4k0,Scrape
+470,/m/07pdjhy,Rub
+471,/m/07s8j8t,Roll
+472,/m/07plct2,Crushing
+473,/t/dd00112,"Crumpling, crinkling"
+474,/m/07qcx4z,Tearing
+475,/m/02fs_r,"Beep, bleep"
+476,/m/07qwdck,Ping
+477,/m/07phxs1,Ding
+478,/m/07rv4dm,Clang
+479,/m/07s02z0,Squeal
+480,/m/07qh7jl,Creak
+481,/m/07qwyj0,Rustle
+482,/m/07s34ls,Whir
+483,/m/07qmpdm,Clatter
+484,/m/07p9k1k,Sizzle
+485,/m/07qc9xj,Clicking
+486,/m/07rwm0c,Clickety-clack
+487,/m/07phhsh,Rumble
+488,/m/07qyrcz,Plop
+489,/m/07qfgpx,"Jingle, tinkle"
+490,/m/07rcgpl,Hum
+491,/m/07p78v5,Zing
+492,/t/dd00121,Boing
+493,/m/07s12q4,Crunch
+494,/m/028v0c,Silence
+495,/m/01v_m0,Sine wave
+496,/m/0b9m1,Harmonic
+497,/m/0hdsk,Chirp tone
+498,/m/0c1dj,Sound effect
+499,/m/07pt_g0,Pulse
+500,/t/dd00125,"Inside, small room"
+501,/t/dd00126,"Inside, large room or hall"
+502,/t/dd00127,"Inside, public space"
+503,/t/dd00128,"Outside, urban or manmade"
+504,/t/dd00129,"Outside, rural or natural"
+505,/m/01b9nn,Reverberation
+506,/m/01jnbd,Echo
+507,/m/096m7z,Noise
+508,/m/06_y0by,Environmental noise
+509,/m/07rgkc5,Static
+510,/m/06xkwv,Mains hum
+511,/m/0g12c5,Distortion
+512,/m/08p9q4,Sidetone
+513,/m/07szfh9,Cacophony
+514,/m/0chx_,White noise
+515,/m/0cj0r,Pink noise
+516,/m/07p_0gm,Throbbing
+517,/m/01jwx6,Vibration
+518,/m/07c52,Television
+519,/m/06bz3,Radio
+520,/m/07hvw1,Field recording

yamnet_saved_model/fingerprint.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd5fc9281fd065d54cf089e0fcdee0b36172677c5261d02d51ac57bb16ddb08e
+size 57

yamnet_saved_model/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14827dd28b4400f559dac66563edbb8c648b7aaad9e5ad2214ffd7759b832d2d
+size 2947713

yamnet_saved_model/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ac2e57a8feb68ba9fbc93248ed2d7f2ec8940a95de07a5131d1bc39c6ffbe31
+size 15140606

yamnet_saved_model/variables/variables.index ADDED Viewed

Binary file (7.4 kB). View file