Spaces:

batra43pvd
/

bird-classifier

Running

App Files Files Community

batra43pvd commited on 4 days ago

Commit

9022aca

verified ·

1 Parent(s): 2ded0c7

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -19

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 # app.py
-# Phiên bản hoàn chỉnh, đã sửa lỗi đọc file audio và đồng bộ hóa phiên bản thư viện.
-# Đã thêm các dòng print để chẩn đoán dữ liệu.
 import os
 import joblib
@@ -10,8 +9,9 @@ from flask import Flask, request, jsonify, render_template
 from werkzeug.utils import secure_filename
 import traceback
-# --- Thư viện mới để đọc audio một cách mạnh mẽ ---
 from pydub import AudioSegment
 # --- Cấu hình TensorFlow và các thư viện AI ---
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
@@ -106,19 +106,17 @@ def _create_spectrogram_image(y, sr):
         print(f"Lỗi tạo ảnh spectrogram: {e}")
         return np.zeros(SPECTROGRAM_SHAPE)
-# --- HÀM XỬ LÝ AUDIO ĐÃ ĐƯỢC CẬP NHẬT ---
 def process_audio_file(file_path):
     """
-    Hàm tổng hợp phiên bản mới: Dùng pydub để đọc file audio một cách mạnh mẽ,
     sau đó chuyển đổi sang định dạng mà librosa có thể xử lý an toàn.
     """
     try:
-        # 1. Dùng pydub để mở file audio (hỗ trợ nhiều định dạng)
         audio = AudioSegment.from_file(file_path)
-        # 2. **BƯỚC MỚI: CHUẨN HÓA ÂM LƯỢNG**
-        # Chuẩn hóa âm lượng về một mức tiêu chuẩn (-20 dBFS).
-        # Điều này giúp giảm sự khác biệt về âm lượng giữa các bản ghi.
         target_dbfs = -20.0
         change_in_dbfs = target_dbfs - audio.dBFS
         audio = audio.apply_gain(change_in_dbfs)
@@ -128,20 +126,25 @@ def process_audio_file(file_path):
         audio = audio.set_frame_rate(SAMPLE_RATE)
         # 4. Chuyển đổi audio của pydub thành mảng NumPy cho librosa
-        # Chuẩn hóa về khoảng [-1, 1]
         samples = np.array(audio.get_array_of_samples()).astype(np.float32)
         y = samples / (2**(audio.sample_width * 8 - 1))
-        # 5. Chuẩn hóa độ dài audio về MAX_SAMPLES
-        if len(y) > MAX_SAMPLES:
-            y = y[:MAX_SAMPLES]
         else:
-            y = np.pad(y, (0, MAX_SAMPLES - len(y)), mode='constant')
-        # 6. Trích xuất đồng thời các bộ đặc trưng (code này không đổi)
-        traditional_features = _extract_traditional_features(y, SAMPLE_RATE)
-        wav2vec_features = _extract_wav2vec_features(y, SAMPLE_RATE)
-        spectrogram = _create_spectrogram_image(y, SAMPLE_RATE)
         return traditional_features, wav2vec_features, spectrogram
@@ -178,7 +181,7 @@ def predict():
              return jsonify({'error': 'Không thể xử lý file audio.'}), 500
         # ========== BẮT ĐẦU PHẦN GHI LOG CHẨN ĐOÁN ==========
-        print("\n--- BẮT ĐẦU CHẨN ĐOÁN DỮ LIỆU ĐẦU VÀO ---")
         print(f"DEBUG | trad_feats stats: mean={np.mean(trad_feats):.2f}, std={np.std(trad_feats):.2f}, min={np.min(trad_feats):.2f}, max={np.max(trad_feats):.2f}")
         print(f"DEBUG | w2v_feats stats:  mean={np.mean(w2v_feats):.2f}, std={np.std(w2v_feats):.2f}, min={np.min(w2v_feats):.2f}, max={np.max(w2v_feats):.2f}")
         print(f"DEBUG | spec_img stats:   mean={np.mean(spec_img):.2f}, std={np.std(spec_img):.2f}, min={np.min(spec_img):.2f}, max={np.max(spec_img):.2f}")

 # app.py
+# Phiên bản hoàn chỉnh, đã thêm bước giảm nhiễu để cải thiện độ chính xác.
 import os
 import joblib
 from werkzeug.utils import secure_filename
 import traceback
+# --- Thư viện mới để đọc audio và giảm nhiễu ---
 from pydub import AudioSegment
+import noisereduce as nr
 # --- Cấu hình TensorFlow và các thư viện AI ---
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
         print(f"Lỗi tạo ảnh spectrogram: {e}")
         return np.zeros(SPECTROGRAM_SHAPE)
+# --- HÀM XỬ LÝ AUDIO ĐÃ ĐƯỢC CẬP NHẬT VỚI BƯỚC GIẢM NHIỄU ---
 def process_audio_file(file_path):
     """
+    Hàm tổng hợp phiên bản mới: Dùng pydub, chuẩn hóa âm lượng, giảm nhiễu,
     sau đó chuyển đổi sang định dạng mà librosa có thể xử lý an toàn.
     """
     try:
+        # 1. Dùng pydub để mở file audio
         audio = AudioSegment.from_file(file_path)
+        # 2. Chuẩn hóa âm lượng về một mức tiêu chuẩn
         target_dbfs = -20.0
         change_in_dbfs = target_dbfs - audio.dBFS
         audio = audio.apply_gain(change_in_dbfs)
         audio = audio.set_frame_rate(SAMPLE_RATE)
         # 4. Chuyển đổi audio của pydub thành mảng NumPy cho librosa
         samples = np.array(audio.get_array_of_samples()).astype(np.float32)
         y = samples / (2**(audio.sample_width * 8 - 1))
+        # 5. **BƯỚC MỚI: GIẢM NHIỄU (NOISE REDUCTION)**
+        # Thực hiện giảm nhiễu trên tín hiệu âm thanh
+        print("DEBUG | Bắt đầu giảm nhiễu...")
+        y_reduced_noise = nr.reduce_noise(y=y, sr=SAMPLE_RATE, prop_decrease=0.8)
+        print("DEBUG | Giảm nhiễu hoàn tất.")
+        # 6. Chuẩn hóa độ dài audio về MAX_SAMPLES (sử dụng tín hiệu đã giảm nhiễu)
+        if len(y_reduced_noise) > MAX_SAMPLES:
+            y_final = y_reduced_noise[:MAX_SAMPLES]
         else:
+            y_final = np.pad(y_reduced_noise, (0, MAX_SAMPLES - len(y_reduced_noise)), mode='constant')
+        # 7. Trích xuất đồng thời các bộ đặc trưng (dùng tín hiệu cuối cùng)
+        traditional_features = _extract_traditional_features(y_final, SAMPLE_RATE)
+        wav2vec_features = _extract_wav2vec_features(y_final, SAMPLE_RATE)
+        spectrogram = _create_spectrogram_image(y_final, SAMPLE_RATE)
         return traditional_features, wav2vec_features, spectrogram
              return jsonify({'error': 'Không thể xử lý file audio.'}), 500
         # ========== BẮT ĐẦU PHẦN GHI LOG CHẨN ĐOÁN ==========
+        print("\n--- BẮT ĐẦU CHẨN ĐOÁN DỮ LIỆU ĐẦU VÀO (SAU KHI GIẢM NHIỄU) ---")
         print(f"DEBUG | trad_feats stats: mean={np.mean(trad_feats):.2f}, std={np.std(trad_feats):.2f}, min={np.min(trad_feats):.2f}, max={np.max(trad_feats):.2f}")
         print(f"DEBUG | w2v_feats stats:  mean={np.mean(w2v_feats):.2f}, std={np.std(w2v_feats):.2f}, min={np.min(w2v_feats):.2f}, max={np.max(w2v_feats):.2f}")
         print(f"DEBUG | spec_img stats:   mean={np.mean(spec_img):.2f}, std={np.std(spec_img):.2f}, min={np.min(spec_img):.2f}, max={np.max(spec_img):.2f}")