Spaces:

ZiyuG
/

SignLanguage

Running on A10G

App Files Files Community

ZiyuG commited on Sep 17

Commit

cd92698

•

1 Parent(s): f08d9ef

Update evaluate.py

Browse files

Files changed (1) hide show

evaluate.py +42 -28

evaluate.py CHANGED Viewed

@@ -4,7 +4,6 @@ import numpy as np
 from sklearn.preprocessing import Normalizer
 from align import align_filter
 def merge_intervals_with_breaks(time_intervals, errors, max_break=1.5):
     print(f"时间区间: {time_intervals}")
     print(f"错误: {errors}")
@@ -45,6 +44,33 @@ def findcos_single(k1, k2):
     cosine_similarity = a / (np.sqrt(b) * np.sqrt(c))
     return 100 * (1 - (1 - cosine_similarity) / 2), 0
 def findCosineSimilarity_1(keypoints1, keypoints2):
     # transformer = Normalizer().fit(keypoints1)
@@ -91,7 +117,7 @@ def eval(test, standard, tmpdir):
     frame_width = int(cap_00.get(cv2.CAP_PROP_FRAME_WIDTH))
     frame_height = int(cap_00.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    out = cv2.VideoWriter(tmpdir + '/output.mp4', cv2.VideoWriter_fourcc(*'H264'), 5, (frame_width*2, frame_height*2))
     cap_00.set(cv2.CAP_PROP_POS_FRAMES, 0) # 初始化视频从头开始读取
     cap_01.set(cv2.CAP_PROP_POS_FRAMES, 0)
@@ -113,12 +139,15 @@ def eval(test, standard, tmpdir):
         elif not ret_00 and not ret_01:
             comments = 2
             break
-        combined_frame_ori = np.hstack((frame_00, frame_01))
         # 获取视频当前的帧号
         frame_id_00 = int(cap_00.get(cv2.CAP_PROP_POS_FRAMES))
         frame_id_01 = int(cap_01.get(cv2.CAP_PROP_POS_FRAMES))
         # 处理标准视频中的关键点，并绘制关键点连接
         if frame_id_00 < min_length:
             keypoints_00 = data_00[frame_id_00]["instances"][0]["keypoints"]
@@ -151,7 +180,7 @@ def eval(test, standard, tmpdir):
         if frame_id_01 < min_length:
             error = []
             bigerror = []
-            keypoints_01 = data_01[frame_id_01]["instances"][0]["keypoints"]
             for (start, end) in connections1:
                 start = start - 1
@@ -196,7 +225,8 @@ def eval(test, standard, tmpdir):
                 cv2.circle(frame_01, (int(point[0]), int(point[1])), 1, (0, 210, 0), -1)
         # Concatenate the images horizontally to display side by side
-        combined_frame = np.hstack((frame_00, frame_01))
         if frame_id_00 < min_length and frame_id_01 < min_length:
             min_cos, min_idx = findCosineSimilarity_1(data_00[frame_id_00]["instances"][0]["keypoints"], data_01[frame_id_01]["instances"][0]["keypoints"])
@@ -213,7 +243,7 @@ def eval(test, standard, tmpdir):
             part = ""
             # 在视频帧上显示检测到的误差部位
-            cv2.putText(combined_frame, "Please check: ", (int(frame_width*1.75), int(frame_height*0.2)), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 2)
             start_x = int(frame_width*1.75) + 10   #435 # 起始的 x 坐标
             start_y = int(frame_height*0.2) + 50 # 45
             line_height = 50 # 每一行文字的高度
@@ -222,7 +252,7 @@ def eval(test, standard, tmpdir):
             for i, item in enumerate(list(set(content))):
                 text = "- " + item
                 y_position = start_y + i * line_height
-                cv2.putText(combined_frame, text, (start_x, y_position), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 2)
         # big
         if bigerror != []:
@@ -255,29 +285,12 @@ def eval(test, standard, tmpdir):
     return sum(scores) / len(scores), final_merged_intervals, comments
 def install():
-    # if torch.cuda.is_available():
-    #     cu_version = torch.version.cuda
-    #     cu_version = f"cu{cu_version.replace('.', '')}"  # Format it as 'cuXX' (e.g., 'cu113')
-    # else:
-    #     cu_version = "cpu"  # Fallback to CPU if no CUDA is available
-    # torch_version = torch.__version__.split('+')[0]  # Get PyTorch version without build info
-    # pip_command = f'pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html'
-    # os.system(pip_command)
     import subprocess
     subprocess.run(["pip", "uninstall", "-y", "numpy"], check=True)
-    subprocess.run(["pip", "install", "numpy<2"], check=True)
     os.system('mim install mmengine')
-    # os.system('mim install "mmcv"')
-    # os.system('mim install "mmdet"')
-    # os.system('mim install "mmpose"')
-    # os.system('pip3 install mmcv==2.2.0 -f https://download.openmmlab.com/mmcv/dist/cu121/torch2.4/index.html"')
-    # os.system('pip3 install mmcv==2.2.0 -f https://download.openmmlab.com/mmcv/dist/cu121/torch2.4/index.html')
     os.system('git clone https://github.com/open-mmlab/mmpose.git')
     os.chdir('mmpose')
     os.system('pip install -r requirements.txt')
@@ -289,4 +302,5 @@ def install():
     os.chdir('mmdetection')
     os.system('pip install -v -e .')
     os.chdir('../')
-    # os.system('mim install "mmpose>=1.1.0"')

 from sklearn.preprocessing import Normalizer
 from align import align_filter
 def merge_intervals_with_breaks(time_intervals, errors, max_break=1.5):
     print(f"时间区间: {time_intervals}")
     print(f"错误: {errors}")
     cosine_similarity = a / (np.sqrt(b) * np.sqrt(c))
     return 100 * (1 - (1 - cosine_similarity) / 2), 0
+def align_hstack(frame_00, frame_01, keypoints_01=None):
+    height_00 = frame_00.shape[0]
+    height_01 = frame_01.shape[0]
+    if height_01 != height_00:
+        # 计算缩放比例，确保高度与 frame_00 一致
+        scale_factor = height_00 / height_01
+        new_width = int(frame_01.shape[1] * scale_factor)
+        # 使用 OpenCV 的 resize 函数按比例缩放 frame_01
+        frame_01_resized = cv2.resize(frame_01, (new_width, height_00))
+    else:
+        frame_01_resized = frame_01
+    # 现在可以水平拼接两个数组
+    combined_frame_ori = np.hstack((frame_00, frame_01_resized))
+    if keypoints_01 == None:    return combined_frame_ori, None
+    scale_factor = frame_00.shape[0] / frame_01.shape[0]  # 根据高度的缩放比例
+    # 对 frame_01 的关键点进行缩放
+    keypoints_01_scaled = []
+    for point in keypoints_01:
+        scaled_point = [point[0] * scale_factor, point[1] * scale_factor]  # 仅对 x 和 y 坐标进行缩放
+        keypoints_01_scaled.append(scaled_point)
+    return combined_frame_ori, keypoints_01_scaled
 def findCosineSimilarity_1(keypoints1, keypoints2):
     # transformer = Normalizer().fit(keypoints1)
     frame_width = int(cap_00.get(cv2.CAP_PROP_FRAME_WIDTH))
     frame_height = int(cap_00.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    out = cv2.VideoWriter(tmpdir + '/output.mp4', cv2.VideoWriter_fourcc(*'XVID'), 5, (frame_width*2, frame_height*2))
     cap_00.set(cv2.CAP_PROP_POS_FRAMES, 0) # 初始化视频从头开始读取
     cap_01.set(cv2.CAP_PROP_POS_FRAMES, 0)
         elif not ret_00 and not ret_01:
             comments = 2
             break
+        # combined_frame_ori = np.hstack((frame_00, frame_01))
         # 获取视频当前的帧号
         frame_id_00 = int(cap_00.get(cv2.CAP_PROP_POS_FRAMES))
         frame_id_01 = int(cap_01.get(cv2.CAP_PROP_POS_FRAMES))
+        if frame_id_01 < min_length:
+            combined_frame_ori, keypoints_01_scaled = align_hstack(frame_00, frame_01, data_01[frame_id_01]["instances"][0]["keypoints"])
+        else:
+            combined_frame_ori, _ = align_hstack(frame_00, frame_01)
         # 处理标准视频中的关键点，并绘制关键点连接
         if frame_id_00 < min_length:
             keypoints_00 = data_00[frame_id_00]["instances"][0]["keypoints"]
         if frame_id_01 < min_length:
             error = []
             bigerror = []
+            keypoints_01 = keypoints_01_scaled #data_01[frame_id_01]["instances"][0]["keypoints"]
             for (start, end) in connections1:
                 start = start - 1
                 cv2.circle(frame_01, (int(point[0]), int(point[1])), 1, (0, 210, 0), -1)
         # Concatenate the images horizontally to display side by side
+        # combined_frame = np.hstack((frame_00, frame_01))
+        combined_frame, _ = align_hstack(frame_00, frame_01)
         if frame_id_00 < min_length and frame_id_01 < min_length:
             min_cos, min_idx = findCosineSimilarity_1(data_00[frame_id_00]["instances"][0]["keypoints"], data_01[frame_id_01]["instances"][0]["keypoints"])
             part = ""
             # 在视频帧上显示检测到的误差部位
+            # cv2.putText(combined_frame, "Please check: ", (int(frame_width*1.75), int(frame_height*0.2)), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 2)
             start_x = int(frame_width*1.75) + 10   #435 # 起始的 x 坐标
             start_y = int(frame_height*0.2) + 50 # 45
             line_height = 50 # 每一行文字的高度
             for i, item in enumerate(list(set(content))):
                 text = "- " + item
                 y_position = start_y + i * line_height
+                # cv2.putText(combined_frame, text, (start_x, y_position), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 2)
         # big
         if bigerror != []:
     return sum(scores) / len(scores), final_merged_intervals, comments
 def install():
     import subprocess
     subprocess.run(["pip", "uninstall", "-y", "numpy"], check=True)
+    subprocess.run(["pip", "install", "numpy<2"]x, check=True)
     os.system('mim install mmengine')
+    os.system('mim install mmcv==2.2.0')
     os.system('git clone https://github.com/open-mmlab/mmpose.git')
     os.chdir('mmpose')
     os.system('pip install -r requirements.txt')
     os.chdir('mmdetection')
     os.system('pip install -v -e .')
     os.chdir('../')
+    os.system('apt-get install ffmpeg imagemagick')