Spaces:

fydhfzh
/

classifier-model-testing

Build error

App Files Files Community

fydhfzh commited on Jul 2, 2024

Commit

8cbd12d

1 Parent(s): d702eb1

initial commit

Browse files

Files changed (2) hide show

app.py +127 -0
requirements.txt +202 -0

app.py ADDED Viewed

	@@ -0,0 +1,127 @@

+from transformers import pipeline
+import gradio as gr
+import torch
+import numpy as np
+import librosa
+import matplotlib.pyplot as plt
+import noisereduce
+model_id = "fydhfzh/hubert-classifier-aug-fold-3"
+pipe = pipeline("audio-classification", model=model_id)
+def get_binary_values():
+	binary_values = []
+	for i in range(1, 29):
+		binary_rep = format(i, '05b')
+		for i in range(1, 4):
+			binary_harakat = format(i, '02b')
+			binary_values.append(binary_rep + binary_harakat)
+	return binary_values
+binary_values = get_binary_values()
+arabic_letters = [
+	"أَ", "إِ", "أُ",
+	"بَ", "بِ", "بُ",
+	"تَ", "تِ", "تُ",
+	"ثَ", "ثِ", "ثُ",
+	"جَ", "جِ", "جُ",
+	"حَ", "حِ", "حُ",
+	"خَ", "خِ", "خُ",
+	"دَ", "دِ", "دُ",
+	"ذَ", "ذِ", "ذُ",
+	"رَ", "رِ", "رُ",
+	"زَ", "زِ", "زُ",
+	"سَ", "سِ", "سُ",
+	"شَ", "شِ", "شُ",
+	"صَ", "صِ", "صُ",
+	"ضَ", "ضِ", "ضُ",
+	"طَ", "طِ", "طُ",
+	"ظَ", "ظِ", "ظُ",
+	"عَ", "عِ", "عُ",
+	"غَ", "غِ", "غُ",
+	"فَ", "فِ", "فُ",
+	"قَ", "قِ", "قُ",
+	"كَ", "كِ", "كُ",
+	"لَ", "لِ", "لُ",
+	"مَ", "مِ", "مُ",
+	"نَ", "نِ", "نُ",
+	"هَ", "هِ", "هُ",
+	"وَ", "وِ", "وُ",
+	"يَ", "يِ", "يُ"
+]
+arabic_representation = dict(zip(binary_values, arabic_letters))
+arabic_representation
+def split_input(raw_input):
+	mse = librosa.feature.rms(y=raw_input, frame_length=2048, hop_length=512) ** 2
+	mse_db = librosa.core.power_to_db(mse.squeeze(), ref=np.min, top_db=None)
+	mse_db = mse_db[mse_db != 0]
+	percentile_param = 10
+	extra_db_param = 0
+	threshold = np.percentile(mse_db, percentile_param) + extra_db_param
+	print(threshold)
+	intervals = librosa.effects.split(y=raw_input, top_db=threshold) # top_db = 60 - threshold
+	splitted_input = []
+	for i, (start, end) in enumerate(intervals):
+		# Add overlapping frames both for trail and lead to ensure good split result
+		overlap = 2000
+		start = start - overlap if start - overlap >= 0 else 0
+		end = end + overlap if end + overlap <= len(raw_input) else len(raw_input)
+		split_audio = raw_input[start:end]
+		if len(split_audio) < 16000:
+			side_len = (16000 - len(split_audio))/2
+			pad_width = (int(side_len), int(side_len))
+			split_audio = np.pad(split_audio, pad_width=pad_width, mode='constant', constant_values=(0, 0))
+		else:
+			split_audio = split_audio[0:16000]
+		splitted_input.append(split_audio)
+	return splitted_input
+def process_audio(filepath):
+	audio, sr = librosa.load(filepath, sr=16000)
+	audio = noisereduce.reduce_noise(audio, sr)
+	audio = librosa.util.normalize(audio)
+	audios = split_input(audio)
+	return audios
+def classify_utterances(filepath):
+	audios = process_audio(filepath)
+	output = [pipe(audio)[0] for audio in audios]
+	predictions = [arabic_representation[x['label']] for x in output]
+	return ' '.join(predictions)
+demo = gr.Blocks()
+mic_classification = gr.Interface(
+	fn=classify_utterances,
+	inputs=gr.Audio(sources='microphone', type='filepath'),
+	outputs=gr.Textbox()
+)
+file_classification = gr.Interface(
+	fn=classify_utterances,
+	inputs=gr.Audio(sources='upload', type='filepath'),
+	outputs=gr.Textbox()
+)
+with demo:
+    gr.TabbedInterface(
+        [mic_classification, file_classification],
+        ['Classify Microphone', 'Classify Audio File']
+    )
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,202 @@

+absl-py==2.0.0
+accelerate==0.30.1
+aiofiles==23.2.1
+aiohttp==3.8.1
+aiosignal==1.3.1
+altair==5.3.0
+annotated-types==0.7.0
+anyio==4.4.0
+appdirs==1.4.4
+asttokens==2.1.0
+astunparse==1.6.3
+async-timeout==4.0.3
+attrs==23.2.0
+audioread==3.0.1
+backcall==0.2.0
+Brotli==1.0.9
+cachetools==5.3.2
+certifi==2024.6.2
+cffi==1.16.0
+charset-normalizer==2.0.4
+click==8.1.7
+colorama==0.4.6
+comm==0.2.1
+contourpy==1.2.0
+cycler==0.12.1
+datasets==2.19.1
+debugpy==1.6.3
+decorator==5.1.1
+dill==0.3.8
+dm-tree==0.1.8
+dnspython==2.6.1
+docker-pycreds==0.4.0
+email_validator==2.2.0
+entrypoints==0.4
+evaluate==0.4.2
+exceptiongroup==1.2.0
+executing==1.2.0
+fastapi==0.111.0
+fastapi-cli==0.0.4
+ffmpy==0.3.0
+filelock==3.13.1
+flatbuffers==23.5.26
+fonttools==4.46.0
+frozenlist==1.4.0
+fsspec==2024.3.1
+gast==0.5.4
+gevent==23.9.0.post1
+gitdb==4.0.7
+GitPython==3.1.37
+gmpy2==2.1.2
+google-auth==2.23.4
+google-auth-oauthlib==1.1.0
+google-pasta==0.2.0
+gradio==4.37.2
+gradio_client==1.0.2
+greenlet==3.0.3
+grpcio==1.59.3
+h11==0.14.0
+h2==4.1.0
+h5py==3.11.0
+hpack==4.0.0
+httpcore==1.0.5
+httpx==0.27.0
+huggingface_hub==0.23.0
+hyperframe==6.0.1
+idna==3.7
+importlib_metadata==7.1.0
+importlib_resources==6.4.0
+ipykernel==6.17.0
+ipython==8.6.0
+jedi==0.18.1
+Jinja2==3.1.3
+jiwer==3.0.4
+joblib==1.4.2
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+jupyter_client==7.4.4
+jupyter_core==4.11.2
+keras==3.0.1
+kiwisolver==1.4.5
+lazy_loader==0.4
+libclang==16.0.6
+librosa==0.10.2.post1
+llvmlite==0.42.0
+Markdown==3.6
+markdown-it-py==3.0.0
+MarkupSafe==2.1.3
+matplotlib==3.8.2
+matplotlib-inline==0.1.6
+mdurl==0.1.2
+ml-dtypes==0.3.2
+mpmath==1.3.0
+msgpack==1.0.8
+multidict==6.0.4
+multiprocess==0.70.13
+munkres==1.1.4
+namex==0.0.7
+nest-asyncio==1.5.6
+networkx==3.1
+noisereduce==3.0.2
+numba==0.59.1
+numpy==1.23.1
+oauthlib==3.2.2
+opt-einsum==3.3.0
+orjson==3.10.4
+packaging==23.2
+pandas==1.4.4
+parso==0.8.3
+pathtools==0.1.2
+patsy==0.5.6
+pickleshare==0.7.5
+pillow==10.2.0
+pip==24.0
+pkgutil_resolve_name==1.3.10
+platformdirs==3.10.0
+pooch==1.8.1
+prompt-toolkit==3.0.32
+protobuf==4.23.4
+psutil==5.9.3
+pure-eval==0.2.2
+pyarrow==14.0.2
+pyarrow-hotfix==0.6
+pycparser==2.22
+pydantic==2.8.0
+pydantic_core==2.20.0
+pydub==0.25.1
+Pygments==2.13.0
+pyparsing==3.0.9
+PySastrawi==1.2.0
+PySocks==1.7.1
+python-dateutil==2.8.2
+python-multipart==0.0.9
+python-version==0.0.2
+pytz==2024.1
+pywin32==304
+PyYAML==6.0.1
+pyzmq==24.0.1
+rapidfuzz==3.9.1
+referencing==0.35.1
+regex==2023.10.3
+requests==2.31.0
+requests-oauthlib==1.3.1
+rich==13.7.0
+rpds-py==0.18.1
+rsa==4.9
+ruff==0.5.0
+safetensors==0.4.2
+scikit-learn==1.5.0
+scipy==1.13.1
+seaborn==0.13.2
+semantic-version==2.10.0
+sentry-sdk==1.9.0
+setproctitle==1.2.2
+setuptools==69.5.1
+shellingham==1.5.4
+six==1.16.0
+smmap==4.0.0
+sniffio==1.3.1
+soundfile==0.12.1
+soxr==0.3.7
+stack-data==0.6.0
+starlette==0.37.2
+statsmodels==0.14.2
+sympy==1.12
+tensorboard==2.16.2
+tensorboard-data-server==0.7.2
+tensorflow==2.16.1
+tensorflow-estimator==2.15.0
+tensorflow-intel==2.16.1
+tensorflow-io-gcs-filesystem==0.31.0
+termcolor==2.3.0
+tf_keras==2.16.0
+threadpoolctl==3.5.0
+tokenizers==0.15.1
+tomlkit==0.12.0
+toolz==0.12.1
+torch==2.3.0
+tornado==6.2
+tqdm==4.66.4
+traitlets==5.5.0
+transformers==4.38.2
+typer==0.12.3
+typer-slim==0.12.3
+typing_extensions==4.11.0
+ujson==5.10.0
+unicodedata2==15.1.0
+urllib3==2.2.1
+uvicorn==0.30.1
+wandb==0.16.5
+wcwidth==0.2.5
+websockets==11.0.3
+Werkzeug==3.0.3
+wheel==0.43.0
+win-inet-pton==1.1.0
+wordcloud==1.9.2
+wrapt==1.16.0
+xgboost==1.7.5
+xxhash==2.0.2
+yarl==1.7.2
+zipp==3.17.0
+zope.event==5.0
+zope.interface==6.4.post2