anchorxia commited on
Commit
a57c6eb
1 Parent(s): f7d3f4d
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. MuseV/MMCM/.gitignore +139 -0
  2. MuseV/MMCM/Dockerfile +83 -0
  3. MuseV/MMCM/README.md +2 -0
  4. MuseV/MMCM/mmcm/__init__.py +6 -0
  5. MuseV/MMCM/mmcm/audio/__init__.py +0 -0
  6. MuseV/MMCM/mmcm/data/__init__.py +9 -0
  7. MuseV/MMCM/mmcm/data/clip.py +324 -0
  8. MuseV/MMCM/mmcm/data/clip/__init__.py +5 -0
  9. MuseV/MMCM/mmcm/data/clip/clip.py +197 -0
  10. MuseV/MMCM/mmcm/data/clip/clip_filter.py +46 -0
  11. MuseV/MMCM/mmcm/data/clip/clip_fusion.py +64 -0
  12. MuseV/MMCM/mmcm/data/clip/clip_process.py +366 -0
  13. MuseV/MMCM/mmcm/data/clip/clip_stat.py +13 -0
  14. MuseV/MMCM/mmcm/data/clip/clipid.py +70 -0
  15. MuseV/MMCM/mmcm/data/crawl/__init__.py +0 -0
  16. MuseV/MMCM/mmcm/data/crawl/download.py +72 -0
  17. MuseV/MMCM/mmcm/data/crawl/error.py +20 -0
  18. MuseV/MMCM/mmcm/data/crawl/ffmpeg.py +39 -0
  19. MuseV/MMCM/mmcm/data/crawl/flicker.py +22 -0
  20. MuseV/MMCM/mmcm/data/crawl/youtube.py +13 -0
  21. MuseV/MMCM/mmcm/data/emb/__init__.py +2 -0
  22. MuseV/MMCM/mmcm/data/emb/emb.py +104 -0
  23. MuseV/MMCM/mmcm/data/emb/h5py_emb.py +119 -0
  24. MuseV/MMCM/mmcm/data/emb/json_emb.py +0 -0
  25. MuseV/MMCM/mmcm/data/emb/numpy_emb.py +0 -0
  26. MuseV/MMCM/mmcm/data/extract_feature/__init__.py +0 -0
  27. MuseV/MMCM/mmcm/data/extract_feature/base_extract_feature.py +28 -0
  28. MuseV/MMCM/mmcm/data/general/__init__.py +1 -0
  29. MuseV/MMCM/mmcm/data/general/items.py +69 -0
  30. MuseV/MMCM/mmcm/data/media_map/__init__.py +1 -0
  31. MuseV/MMCM/mmcm/data/media_map/media_map.py +393 -0
  32. MuseV/MMCM/mmcm/data/media_map/media_map_process.py +72 -0
  33. MuseV/MMCM/mmcm/music/__init__.py +6 -0
  34. MuseV/MMCM/mmcm/music/music_map/__init__.py +0 -0
  35. MuseV/MMCM/mmcm/music/music_map/beat_map.py +82 -0
  36. MuseV/MMCM/mmcm/music/music_map/clip_process.py +196 -0
  37. MuseV/MMCM/mmcm/music/music_map/convert_type.py +57 -0
  38. MuseV/MMCM/mmcm/music/music_map/load_music_map.py +38 -0
  39. MuseV/MMCM/mmcm/music/music_map/lyric_map.py +149 -0
  40. MuseV/MMCM/mmcm/music/music_map/lyric_process.py +515 -0
  41. MuseV/MMCM/mmcm/music/music_map/meta_info.py +21 -0
  42. MuseV/MMCM/mmcm/music/music_map/mss_map.py +185 -0
  43. MuseV/MMCM/mmcm/music/music_map/music_clip.py +83 -0
  44. MuseV/MMCM/mmcm/music/music_map/music_map.py +140 -0
  45. MuseV/MMCM/mmcm/music/music_map/music_map_demp.py +58 -0
  46. MuseV/MMCM/mmcm/music/utils/__init__.py +0 -0
  47. MuseV/MMCM/mmcm/music/utils/path_util.py +9 -0
  48. MuseV/MMCM/mmcm/t2p/.gitignore +158 -0
  49. MuseV/MMCM/mmcm/t2p/GPT_eval_multi.py +121 -0
  50. MuseV/MMCM/mmcm/t2p/LICENSE +201 -0
MuseV/MMCM/.gitignore ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+
53
+ # Translations
54
+ *.mo
55
+ *.pot
56
+
57
+ # Django stuff:
58
+ *.log
59
+ local_settings.py
60
+ db.sqlite3
61
+
62
+ # Flask stuff:
63
+ instance/
64
+ .webassets-cache
65
+
66
+ # Scrapy stuff:
67
+ .scrapy
68
+
69
+ # Sphinx documentation
70
+ docs/_build/
71
+
72
+ # PyBuilder
73
+ target/
74
+
75
+ # Jupyter Notebook
76
+ .ipynb_checkpoints
77
+
78
+ # IPython
79
+ profile_default/
80
+ ipython_config.py
81
+
82
+ # pyenv
83
+ .python-version
84
+
85
+ # pipenv
86
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
87
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
88
+ # having no cross-platform support, pipenv may install dependencies that don’t work, or not
89
+ # install all needed dependencies.
90
+ #Pipfile.lock
91
+
92
+ # celery beat schedule file
93
+ celerybeat-schedule
94
+
95
+ # SageMath parsed files
96
+ *.sage.py
97
+
98
+ # Environments
99
+ .env
100
+ .venv
101
+ env/
102
+ venv/
103
+ ENV/
104
+ env.bak/
105
+ venv.bak/
106
+
107
+ # Spyder project settings
108
+ .spyderproject
109
+ .spyproject
110
+
111
+ # Rope project settings
112
+ .ropeproject
113
+
114
+ # mkdocs documentation
115
+ /site
116
+
117
+ # mypy
118
+ .mypy_cache/
119
+ .dmypy.json
120
+ dmypy.json
121
+
122
+ # Pyre type checker
123
+ .pyre/
124
+
125
+ *.swp
126
+ .*.swp
127
+ dataset/files
128
+ experiments
129
+ log
130
+ csvs
131
+
132
+ .idea
133
+ .vscode
134
+ __pycache__/
135
+ *.code-workspace
136
+ .DS_Store
137
+ third_party/
138
+ .polaris_cache/
139
+ *.lock
MuseV/MMCM/Dockerfile ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FROM mirrors.tencent.com/todacc/venus-std-base-cuda11.8:0.1.0
2
+ FROM mirrors.tencent.com/todacc/venus-std-ext-cuda11.8-pytorch2.0-tf2.12-py3.10:0.7.0
3
+
4
+ #MAINTAINER 维护者信息
5
+ LABEL MAINTAINER="anchorxia"
6
+ LABEL Email="[email protected]"
7
+ LABEL Description="gpu development image, from mirrors.tencent.com/todacc/venus-std-ext-cuda11.8-pytorch2.0-tf2.12-py3.10:0.7.0"
8
+
9
+ USER root
10
+ # 安装必须软件
11
+ # RUN GENERIC_REPO_URL="http://mirrors.tencent.com/repository/generic/venus_repo/image_res" \
12
+ # && cd /data/ \
13
+ # && wget -q $GENERIC_REPO_URL/gcc/gcc-11.2.0.zip \
14
+ # && unzip -q gcc-11.2.0.zip \
15
+ # && cd gcc-releases-gcc-11.2.0 \
16
+ # && ./contrib/download_prerequisites \
17
+ # && ./configure --enable-bootstrap --enable-languages=c,c++ --enable-threads=posix --enable-checking=release --enable-multilib --with-system-zlib \
18
+ # && make --silent -j10 \
19
+ # && make --silent install \
20
+ # && gcc -v \
21
+ # && rm -rf /data/gcc-releases-gcc-11.2.0 /data/gcc-11.2.0.zip
22
+
23
+ # RUN yum update -y \
24
+ # && yum install -y epel-release \
25
+ # && yum install -y ffmpeg \
26
+ # && yum install -y Xvfb \
27
+ # && yum install -y centos-release-scl devtoolset-11
28
+ RUN yum install -y wget zsh git curl tmux cmake htop iotop git-lfs zip \
29
+ && yum install -y autojump autojump-zsh portaudio portaudio-devel \
30
+ && yum clean all
31
+
32
+ USER mqq
33
+ RUN source ~/.bashrc \
34
+ && GENERIC_REPO_URL="http://mirrors.tencent.com/repository/generic/venus_repo/image_res" \
35
+ && conda deactivate \
36
+ # && conda remove -y -n env-2.7.18 --all \
37
+ # && conda remove -y -n env-3.6.8 --all \
38
+ # && conda remove -y -n env-3.7.7 --all \
39
+ # && conda remove -y -n env-3.8.8 --all \
40
+ # && conda remove -y -n env-3.9.2 --all \
41
+ # && conda remove -y -n env-novelai --all \
42
+ && conda create -n projectv python=3.10.6 -y \
43
+ && conda activate projectv \
44
+ && pip install venus-sdk -q -i https://mirrors.tencent.com/repository/pypi/tencent_pypi/simple \
45
+ --extra-index-url https://mirrors.tencent.com/pypi/simple/ \
46
+ && pip install tensorflow==2.12.0 tensorboard==2.12.0 \
47
+ && pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 -f https://mirror.sjtu.edu.cn/pytorch-wheels/torch_stable.html -i https://mirrors.bfsu.edu.cn/pypi/web/simple -U \
48
+ # 安装xformers,支持不同型号gpu
49
+ && pip install ninja==1.11.1 \
50
+ # && git clone https://github.com/facebookresearch/xformers.git \
51
+ # && cd xformers \
52
+ # && git checkout v0.0.17rc482 \
53
+ # && git submodule update --init --recursive \
54
+ # && pip install numpy==1.23.4 pyre-extensions==0.0.23 \
55
+ # && FORCE_CUDA="1" MAX_JOBS=1 TORCH_CUDA_ARCH_LIST="6.1;7.0;7.5;8.0;8.6" pip install -e . \
56
+ # && cd .. \
57
+ # 安装一堆包
58
+ && pip install --no-cache-dir transformers bitsandbytes decord accelerate xformers omegaconf einops imageio==2.31.1 \
59
+ && pip install --no-cache-dir pandas h5py matplotlib modelcards pynvml black pytest moviepy torch-tb-profiler scikit-learn librosa ffmpeg easydict webp controlnet_aux mediapipe \
60
+ && pip install --no-cache-dir Cython easydict gdown infomap insightface ipython librosa onnx onnxruntime onnxsim opencv_python Pillow protobuf pytube PyYAML \
61
+ && pip install --no-cache-dir requests scipy six tqdm gradio albumentations opencv-contrib-python imageio-ffmpeg pytorch-lightning test-tube \
62
+ && pip install --no-cache-dir timm addict yapf prettytable safetensors basicsr fvcore pycocotools wandb gunicorn \
63
+ && pip install --no-cache-dir streamlit webdataset kornia open_clip_torch streamlit-drawable-canvas torchmetrics \
64
+ # 安装暗水印
65
+ && pip install --no-cache-dir invisible-watermark==0.1.5 gdown==4.5.3 ftfy==6.1.1 modelcards==0.1.6 \
66
+ # 安装openmm相关包
67
+ && pip install--no-cache-dir -U openmim \
68
+ && mim install mmengine \
69
+ && mim install "mmcv>=2.0.1" \
70
+ && mim install "mmdet>=3.1.0" \
71
+ && mim install "mmpose>=1.1.0" \
72
+ # jupyters
73
+ && pip install ipywidgets==8.0.3 \
74
+ && python -m ipykernel install --user --name projectv --display-name "python(projectv)" \
75
+ && pip install --no-cache-dir matplotlib==3.6.2 redis==4.5.1 pydantic[dotenv]==1.10.2 loguru==0.6.0 IProgress==0.4 \
76
+ && pip install --no-cache-dir cos-python-sdk-v5==1.9.22 coscmd==1.8.6.30 \
77
+ # 必须放在最后pip,避免和jupyter的不兼容
78
+ && pip install --no-cache-dir markupsafe==2.0.1 \
79
+ && wget -P /tmp $GENERIC_REPO_URL/cpu/clean-layer.sh \
80
+ && sh /tmp/clean-layer.sh
81
+
82
+ ENV LD_LIBRARY_PATH=/usr/local/lib64:$LD_LIBRARY_PATH
83
+ USER root
MuseV/MMCM/README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # MMCM
2
+ Process package for multi media, cross multi modal.
MuseV/MMCM/mmcm/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from .audio import *
2
+ from .data import *
3
+ from .music import *
4
+ from .text import *
5
+ from .vision import *
6
+ from .t2p import *
MuseV/MMCM/mmcm/audio/__init__.py ADDED
File without changes
MuseV/MMCM/mmcm/data/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from .general.items import Items, Item
2
+
3
+ from .emb.emb import MediaMapEmb
4
+ from .emb.h5py_emb import H5pyMediaMapEmb, H5pyMediaMapEmbProxy
5
+
6
+ from .media_map.media_map import MediaMap, MetaInfo, MetaInfoList, MediaMapSeq
7
+ from .media_map.media_map_process import get_sub_mediamap_by_clip_idx, get_sub_mediamap_by_stage, get_subseq_by_time
8
+ from .clip.clip import Clip, ClipSeq
9
+ from .clip.clipid import ClipIds, ClipIdsSeq, MatchedClipIds, MatchedClipIdsSeq
MuseV/MMCM/mmcm/data/clip.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from copy import deepcopy
2
+ from typing import Iterable
3
+ import logging
4
+
5
+ import numpy as np
6
+
7
+ from ..utils.util import convert_class_attr_to_dict
8
+
9
+ logger = logging.getLogger(__name__) # pylint: disable=invalid-name
10
+
11
+
12
+ class Clip(object, Item):
13
+ """媒体片段, 指转场点与转场点之间的部分"""
14
+
15
+ def __init__(
16
+ self,
17
+ time_start,
18
+ duration,
19
+ clipid=None,
20
+ media_type=None,
21
+ mediaid=None,
22
+ timepoint_type=None,
23
+ text=None,
24
+ stage=None,
25
+ path=None,
26
+ duration_num=None,
27
+ group_time_start=0,
28
+ group_clipid=None,
29
+ original_clipid=None,
30
+ emb=None,
31
+ multi_factor=None,
32
+ similar_clipseq=None,
33
+ rythm: float = None,
34
+ **kwargs
35
+ ):
36
+ """
37
+ Args:
38
+ time_start (float): 开始时间,秒为单位,对应该媒体文件的, 和media_map.json上的序号一一对应
39
+ duration (_type_): 片段持续时间
40
+ clipid (int, or [int]): 由media_map提供的片段序号, 和media_map.json上的序号一一对应
41
+ media_type (str, optional): music, video,text, Defaults to None.
42
+ mediaid (int): 多媒体id, 当clipid是列表时,表示该片段是个融合片段
43
+ timepoint_type(int, ): 开始点的转场类型. Defaults to None.
44
+ text(str, optional): 该片段的文本描述,音乐可以是歌词,视频可以是台词,甚至可以是弹幕. Defaults to None.
45
+ stage(str, optional): 该片段在整个媒体文件中的结构位置,如音乐的intro、chrous、vesa,视频的片头、片尾、开始、高潮、转场等. Defaults to None.
46
+ path (_type_, optional): 该媒体文件的路径,用于后续媒体读取、处理. Defaults to None.
47
+ duration_num (_type_, optional): 片段持续帧数, Defaults to None.
48
+ group_time_start (int, optional): 当多歌曲、多视频剪辑时,group_time_start 表示该片段所对应的子媒体前所有子媒体的片段时长总和。
49
+ 默认0, 表示只有1个媒体文件. Defaults to 0.
50
+ group_clipid (int, optional): # MediaInfo.sub_meta_info 中的实际序号.
51
+ original_clipid (None or [int], optional): 有些片段由其他片段合并,该字段用于片段来源,id是 media_map.json 中的实际序号. Defaults to None.
52
+ emb (np.array, optional): 片段 综合emb,. Defaults to None.
53
+ multi_factor (MultiFactorFeature), optional): 多维度特征. Defaults to None.
54
+ similar_clipseq ([Clip]], optional): 与该片段相似的片段,具体结构待定义. Defaults to None.
55
+ """
56
+ self.media_type = media_type
57
+ self.mediaid = mediaid
58
+ self.time_start = time_start
59
+ self.duration = duration
60
+ self.clipid = clipid
61
+ self.path = path
62
+ self.timepoint_type = timepoint_type
63
+ self.text = text
64
+ self.stage = stage
65
+ self.group_time_start = group_time_start
66
+ self.group_clipid = group_clipid
67
+ self.duration_num = duration_num
68
+ self.original_clipid = original_clipid if original_clipid is not None else []
69
+ self.emb = emb
70
+ self.multi_factor = multi_factor
71
+ self.similar_clipseq = similar_clipseq
72
+ self.rythm = rythm
73
+ # TODO: 目前谱面中会有一些不必要的中间结果,比较占内存,现在代码里删掉,待后续数据协议确定
74
+ kwargs = {k: v for k, v in kwargs.items()}
75
+ self.__dict__.update(kwargs)
76
+ self.preprocess()
77
+
78
+ def preprocess(self):
79
+ pass
80
+
81
+ def spread_parameters(self):
82
+ pass
83
+
84
+ @property
85
+ def time_end(
86
+ self,
87
+ ):
88
+ return self.time_start + self.duration
89
+
90
+ @property
91
+ def mvp_clip(self):
92
+ """读取实际的片段数据为moviepy格式
93
+
94
+ Raises:
95
+ NotImplementedError: _description_
96
+ """
97
+ raise NotImplementedError
98
+
99
+
100
+ class ClipSeq(object):
101
+ """媒体片段序列"""
102
+
103
+ ClipClass = Clip
104
+
105
+ def __init__(self, clips) -> None:
106
+ """_summary_
107
+
108
+ Args:
109
+ clips ([Clip]]): 媒体片段序列
110
+ """
111
+ if not isinstance(clips, list):
112
+ clips = [clips]
113
+ if len(clips) == 0:
114
+ self.clips = []
115
+ elif isinstance(clips[0], dict):
116
+ self.clips = [self.ClipClass(**d) for d in clips]
117
+ else:
118
+ self.clips = clips
119
+
120
+ def set_clip_value(self, k, v):
121
+ """给序列中的每一个clip 赋值"""
122
+ for i in range(len(self.clips)):
123
+ self.clips[i].__setattr__(k, v)
124
+
125
+ def __len__(
126
+ self,
127
+ ):
128
+ return len(self.clips)
129
+
130
+ def merge(self, other, group_time_start_delta=None, groupid_delta=None):
131
+ """融合其他ClipSeq。media_info 融合时需要记录 clip 所在的 groupid 和 group_time_start,delta用于表示变化
132
+
133
+ Args:
134
+ other (ClipSeq): 待融合的ClipSeq
135
+ group_time_start_delta (float, optional): . Defaults to None.
136
+ groupid_delta (int, optional): _description_. Defaults to None.
137
+ """
138
+ if group_time_start_delta is not None or groupid_delta is not None:
139
+ for i, clip in enumerate(other):
140
+ if group_time_start_delta is not None:
141
+ clip.group_time_start += group_time_start_delta
142
+ if groupid_delta is not None:
143
+ clip.groupid += groupid_delta
144
+ self.clips.extend(other.clips)
145
+ for i in range(len(self.clips)):
146
+ self.clips[i].group_clipid = i
147
+
148
+ @property
149
+ def duration(
150
+ self,
151
+ ):
152
+ """Clip.duration的和
153
+
154
+ Returns:
155
+ float: 序列总时长
156
+ """
157
+ if len(self.clips) == 0:
158
+ return 0
159
+ else:
160
+ return sum([c.duration for c in self.clips])
161
+
162
+ def __getitem__(self, i) -> Clip:
163
+ """支持索引和切片操作,如果输入是整数则返回Clip,如果是切片,则返回ClipSeq
164
+
165
+ Args:
166
+ i (int or slice): 索引
167
+
168
+ Raises:
169
+ ValueError: 需要按照给的输入类型索引
170
+
171
+ Returns:
172
+ Clip or ClipSeq:
173
+ """
174
+ if "int" in str(type(i)):
175
+ i = int(i)
176
+ if isinstance(i, int):
177
+ clip = self.clips[i]
178
+ return clip
179
+ elif isinstance(i, Iterable):
180
+ clips = [self.__getitem__(x) for x in i]
181
+ clipseq = ClipSeq(clips)
182
+ return clipseq
183
+ elif isinstance(i, slice):
184
+ if i.step is None:
185
+ step = 1
186
+ else:
187
+ step = i.step
188
+ clips = [self.__getitem__(x) for x in range(i.start, i.stop, step)]
189
+ clipseq = ClipSeq(clips)
190
+ return clipseq
191
+ else:
192
+ raise ValueError(
193
+ "unsupported input, should be int or slice, but given {}, type={}".format(
194
+ i, type(i)
195
+ )
196
+ )
197
+
198
+ def insert(self, idx, obj):
199
+ self.clips.insert(idx, obj)
200
+
201
+ def append(self, obj):
202
+ self.clips.append(obj)
203
+
204
+ def extend(self, objs):
205
+ self.clips.extend(objs)
206
+
207
+ @property
208
+ def duration_seq_emb(
209
+ self,
210
+ ):
211
+ emb = np.array([c.duration for c in self.clips])
212
+ return emb
213
+
214
+ @property
215
+ def timestamp_seq_emb(self):
216
+ emb = np.array([c.time_start for c in self.clips])
217
+ return emb
218
+
219
+ @property
220
+ def rela_timestamp_seq_emb(self):
221
+ emb = self.timestamp_seq_emb / self.duration
222
+ return emb
223
+
224
+ def get_factor_seq_emb(self, factor, dim):
225
+ emb = []
226
+ for c in self.clips:
227
+ if factor not in c.multi_factor or c.multi_factor[factor] is None:
228
+ v = np.full(dim, np.inf)
229
+ else:
230
+ v = c.multi_factor[factor]
231
+ emb.append(v)
232
+ emb = np.stack(emb, axis=0)
233
+ return emb
234
+
235
+ def semantic_seq_emb(self, dim):
236
+ return self.get_factor_seq_emb(factor="semantics", dim=dim)
237
+
238
+ def emotion_seq_emb(self, dim):
239
+ return self.get_factor_seq_emb(factor="emotion", dim=dim)
240
+
241
+ def theme_seq_emb(self, dim):
242
+ return self.get_factor_seq_emb(factor="theme", dim=dim)
243
+
244
+ def to_dct(
245
+ self,
246
+ target_keys=None,
247
+ ignored_keys=None,
248
+ ):
249
+ if ignored_keys is None:
250
+ ignored_keys = ["kwargs", "audio_path", "lyric_path", "start", "end"]
251
+ clips = [
252
+ clip.to_dct(target_keys=target_keys, ignored_keys=ignored_keys)
253
+ for clip in self.clips
254
+ ]
255
+ return clips
256
+
257
+ @property
258
+ def mvp_clip(self):
259
+ """读取实际的片段数据为moviepy格式
260
+
261
+ Raises:
262
+ NotImplementedError: _description_
263
+ """
264
+ raise NotImplementedError
265
+
266
+
267
+ class ClipIds(object):
268
+ def __init__(
269
+ self,
270
+ clipids: list or int,
271
+ ) -> None:
272
+ """ClipSeq 中的 Clip序号,主要用于多个 Clip 融合后的 Clip, 使用场景如
273
+ 1. 一个 MusicClip 可以匹配到多个 VideoClip,VideoClip 的索引便可以使用 ClipIds 定义。
274
+
275
+ Args:
276
+ clipids (list or int): ClipSeq 中的序号
277
+ """
278
+ self.clipids = clipids if isinstance(clipids, list) else [clipids]
279
+
280
+
281
+ class ClipIdsSeq(object):
282
+ def __init__(self, clipids_seq: list) -> None:
283
+ """多个 ClipIds,使用场景可以是
284
+ 1. 将MediaClipSeq 进行重组,拆分重组成更粗粒度的ClipSeq;
285
+
286
+ Args:
287
+ clipids_seq (list): 组合后的 ClipIds 列表
288
+ """
289
+ self.clipids_seq = (
290
+ clipids_seq if isinstance(clipids_seq, ClipIds) else [clipids_seq]
291
+ )
292
+
293
+
294
+ # TODO: metric后续可能是字典
295
+ class MatchedClipIds(object):
296
+ def __init__(
297
+ self, id1: ClipIds, id2: ClipIds, metric: float = None, **kwargs
298
+ ) -> None:
299
+ """两种模态数据的片段匹配对,���用场景 可以是
300
+ 1. 音乐片段和视频片段 之间的匹配关系,
301
+
302
+ Args:
303
+ id1 (ClipIds): 第一种模态的片段
304
+ id2 (ClipIds): 第二种模态的片段
305
+ metric (float): 匹配度量距离
306
+ """
307
+ self.id1 = id1 if isinstance(id1, ClipIds) else ClipIds(id1)
308
+ self.id2 = id2 if isinstance(id2, ClipIds) else ClipIds(id2)
309
+ self.metric = metric
310
+ self.__dict__.update(**kwargs)
311
+
312
+
313
+ class MatchedClipIdsSeq(object):
314
+ def __init__(self, seq: list, metric: float = None, **kwargs) -> None:
315
+ """两种模态数据的序列匹配对,使用场景可以是
316
+ 1. 音乐片段序列和视频片段序列 之间的匹配,每一个元素都是MatchedClipIds:
317
+
318
+ Args:
319
+ seq (list): 两种模态数据的序列匹配对列表
320
+ metric (float): 匹配度量距离
321
+ """
322
+ self.seq = seq
323
+ self.metric = metric
324
+ self.__dict__.update(**kwargs)
MuseV/MMCM/mmcm/data/clip/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .clip import Clip, ClipSeq
2
+ from .clipid import ClipIds, MatchedClipIds, ClipIdsSeq, MatchedClipIdsSeq
3
+ from .clip_process import find_idx_by_time, find_idx_by_clip, get_subseq_by_time, get_subseq_by_idx, clip_is_top, clip_is_middle, clip_is_end, abadon_old_return_new, reset_clipseq_id, insert_endclip, insert_startclip, drop_start_end_by_time, complete_clipseq, complete_gap
4
+ from .clip_stat import stat_clipseq_duration
5
+ from .clip_filter import ClipFilter, ClipSeqFilter
MuseV/MMCM/mmcm/data/clip/clip.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from copy import deepcopy
3
+
4
+ from typing import Iterable, List, Tuple, Dict, Hashable, Any, Union
5
+
6
+ import numpy as np
7
+
8
+ from ...utils.util import convert_class_attr_to_dict
9
+
10
+
11
+ from ..general.items import Items, Item
12
+ from .clipid import MatchedClipIds
13
+
14
+
15
+ import logging
16
+
17
+ logger = logging.getLogger(__name__) # pylint: disable=invalid-name
18
+
19
+
20
+ __all__ = ["Clip", "ClipSeq"]
21
+
22
+
23
+ class Clip(Item):
24
+ """媒体片段, 指转场点与转场点之间的部分"""
25
+
26
+ def __init__(
27
+ self,
28
+ time_start: float,
29
+ duration: float,
30
+ clipid: int = None,
31
+ media_type: str = None,
32
+ mediaid: str = None,
33
+ timepoint_type: str = None,
34
+ text: str = None,
35
+ stage: str = None,
36
+ path: str = None,
37
+ duration_num: int = None,
38
+ similar_clipseq: MatchedClipIds = None,
39
+ dynamic: float = None,
40
+ **kwargs,
41
+ ):
42
+ """
43
+ Args:
44
+ time_start (float): 开始时间,秒为单位,对应该媒体文件的, 和media_map.json上的序号一一对应
45
+ duration (_type_): 片段持续时间
46
+ clipid (int, or [int]): 由media_map提供的片段序号, 和media_map.json上的序号一一对应
47
+ media_type (str, optional): music, video,text, Defaults to None.
48
+ mediaid (int): 多媒体id, 当clipid是列表时,表示该片段是个融合片段
49
+ timepoint_type(int, ): 开始点的转场类型. Defaults to None.
50
+ text(str, optional): 该片段的文本描述,音乐可以是歌词,视频可以是台词,甚至可以是弹幕. Defaults to None.
51
+ stage(str, optional): 该片段在整个媒体文件中的结构位置,如音乐的intro、chrous、vesa,视频的片头、片尾、开始、高潮、转场等. Defaults to None.
52
+ path (str, optional): 该媒体文件的路径,用于后续媒体读取、处理. Defaults to None.
53
+ duration_num (_type_, optional): 片段持续帧数, Defaults to None.
54
+ similar_clipseq ([Clip]], optional): 与该片段相似的片段,具体结构待定义. Defaults to None.
55
+ """
56
+ self.media_type = media_type
57
+ self.mediaid = mediaid
58
+ self.time_start = time_start
59
+ self.duration = duration
60
+ self.clipid = clipid
61
+ self.path = path
62
+ self.timepoint_type = timepoint_type
63
+ self.text = text
64
+ self.stage = stage
65
+ self.duration_num = duration_num
66
+ self.similar_clipseq = similar_clipseq
67
+ self.dynamic = dynamic
68
+ self.__dict__.update(**kwargs)
69
+
70
+ def preprocess(self):
71
+ pass
72
+
73
+ def spread_parameters(self):
74
+ pass
75
+
76
+ @property
77
+ def time_end(
78
+ self,
79
+ ) -> float:
80
+ return self.time_start + self.duration
81
+
82
+ def get_emb(self, key: str, idx: int) -> np.float:
83
+ return self.emb.get_value(key, idx)
84
+
85
+
86
+ class ClipSeq(Items):
87
+ """媒体片段序列"""
88
+
89
+ def __init__(self, items: List[Clip] = None):
90
+ super().__init__(items)
91
+ self.clipseq = self.data
92
+
93
+ def preprocess(self):
94
+ pass
95
+
96
+ def set_clip_value(self, k: Hashable, v: Any) -> None:
97
+ """给序列中的每一个clip 赋值"""
98
+ for i in range(len(self.clipseq)):
99
+ self.clipseq[i].__setattr__(k, v)
100
+
101
+ def __len__(
102
+ self,
103
+ ) -> int:
104
+ return len(self.clipseq)
105
+
106
+ @property
107
+ def duration(
108
+ self,
109
+ ) -> float:
110
+ """Clip.duration的和
111
+
112
+ Returns:
113
+ float: 序列总时长
114
+ """
115
+ if len(self.clipseq) == 0:
116
+ return 0
117
+ else:
118
+ return sum([c.duration for c in self.clipseq])
119
+
120
+ def __getitem__(self, i: Union[int, Iterable]) -> Union[Clip, ClipSeq]:
121
+ """支持索引和切片操作,如果输入是整数则返回Clip,如果是切片,则返回ClipSeq
122
+
123
+ Args:
124
+ i (int or slice): 索引
125
+
126
+ Raises:
127
+ ValueError: 需要按照给的输入类型索引
128
+
129
+ Returns:
130
+ Clip or ClipSeq:
131
+ """
132
+ if "int" in str(type(i)):
133
+ i = int(i)
134
+ if isinstance(i, int):
135
+ clip = self.clipseq[i]
136
+ return clip
137
+ elif isinstance(i, Iterable):
138
+ clipseq = [self.__getitem__(x) for x in i]
139
+ clipseq = ClipSeq(clipseq)
140
+ return clipseq
141
+ elif isinstance(i, slice):
142
+ if i.step is None:
143
+ step = 1
144
+ else:
145
+ step = i.step
146
+ clipseq = [self.__getitem__(x) for x in range(i.start, i.stop, step)]
147
+ clipseq = ClipSeq(clipseq)
148
+ return clipseq
149
+ else:
150
+ raise ValueError(
151
+ "unsupported input, should be int or slice, but given {}, type={}".format(
152
+ i, type(i)
153
+ )
154
+ )
155
+
156
+ @property
157
+ def mvp_clip(self):
158
+ """读取实际的片段数据为moviepy格式
159
+
160
+ Raises:
161
+ NotImplementedError: _description_
162
+ """
163
+ raise NotImplementedError
164
+
165
+ @property
166
+ def duration_seq_emb(
167
+ self,
168
+ ) -> np.array:
169
+ emb = np.array([c.duration for c in self.clipseq])
170
+ return emb
171
+
172
+ @property
173
+ def timestamp_seq_emb(self) -> np.array:
174
+ emb = np.array([c.time_start for c in self.clipseq])
175
+ return emb
176
+
177
+ @property
178
+ def rela_timestamp_seq_emb(self) -> np.array:
179
+ duration_seq = [c.duration for c in self.clipseq]
180
+ emb = np.cumsum(duration_seq) / self.duration
181
+ return emb
182
+
183
+ def get_emb(self, key: str, idx: int) -> np.float:
184
+ clip_start_idx = self.clipseq[0].clipid
185
+ clip_end_idx = self.clipseq[-1].clipid
186
+ # TODO: 待修改为更通用的形式
187
+ if idx is None:
188
+ idx = range(clip_start_idx, clip_end_idx + 1)
189
+ elif isinstance(idx, int):
190
+ idx += clip_start_idx
191
+ elif isinstance(idx, Iterable):
192
+ idx = [x + clip_start_idx for x in idx]
193
+ else:
194
+ raise ValueError(
195
+ f"idx only support None, int, Iterable, but given {idx},type is {type(idx)}"
196
+ )
197
+ return self.emb.get_value(key, idx=idx)
MuseV/MMCM/mmcm/data/clip/clip_filter.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Callable, List, Union
2
+
3
+ from .clip import ClipSeq
4
+
5
+ from .clip_process import reset_clipseq_id
6
+
7
+
8
+ class ClipFilter(object):
9
+ """clip滤波器,判断 Clip 是否符合标准
10
+
11
+ Args:
12
+ object (bool): 是否符合输入函数
13
+ """
14
+
15
+ def __init__(self, funcs: Union[Callable, List[Callable]], logic_func: Callable=all) -> None:
16
+ """多个 clip 判断函数,通过 逻辑与、或当综合结果。
17
+
18
+ Args:
19
+ funcs (list of func): 列表判断函数
20
+ logic_func (func, optional): all or any. Defaults to all.
21
+ """
22
+ self.funcs = funcs if isinstance(funcs, list) else [funcs]
23
+ self.logic_func = logic_func
24
+
25
+ def __call__(self, clip) -> bool:
26
+ flag = [func(clip) for func in self.funcs]
27
+ flag = self.logic_func(flag)
28
+ return flag
29
+
30
+
31
+
32
+ # TODO
33
+ class ClipSeqFilter(object):
34
+ def __init__(self, filter: Callable) -> None:
35
+ self.filter = filter
36
+
37
+ def __call__(self, clipseq: ClipSeq) -> ClipSeq:
38
+ new_clipseq = []
39
+ n_clipseq = len(clipseq)
40
+ for i in range(n_clipseq):
41
+ clip = clipseq[i]
42
+ if self.filter(clip):
43
+ new_clipseq.append(clip)
44
+ new_clipseq = reset_clipseq_id(new_clipseq)
45
+ # logger.debug("ClipSeqFilter: clipseq length before={}, after={}".format(n_clipseq, len(new_clipseq)))
46
+ return new_clipseq
MuseV/MMCM/mmcm/data/clip/clip_fusion.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Union, Callable
2
+
3
+ from copy import deepcopy
4
+
5
+ from .clip import ClipSeq
6
+ from .clip_process import reset_clipseq_id
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__) # pylint: disable=invalid-name
10
+
11
+
12
+ # TODO: 不同类型的clip需要不同的融合方式
13
+ def fuse_clips(s1: ClipSeq, s2: ClipSeq) -> ClipSeq:
14
+ """合并2个clip
15
+
16
+ Args:
17
+ s1 (Clip):
18
+ s2 (Clip):
19
+
20
+ Returns:
21
+ Clip: 合并后Clip
22
+ """
23
+ if not isinstance(s2, list):
24
+ s2 = [s2]
25
+ s1 = deepcopy(s1)
26
+ for other_clip in s2:
27
+ s1.duration += other_clip.duration
28
+ if s1.stage is not None and other_clip.stage is not None:
29
+ # TODO:如何保留融合的clip信息
30
+ s1.stage = "{}_{}".format(s1.stage, other_clip.stage)
31
+ s1.origin_clipid.extend(other_clip.origin_clipid)
32
+ if s1.timepoint_type is not None and other_clip.timepoint_type is not None:
33
+ s1.timepoint_type = "{}_{}".format(
34
+ s1.timepoint_type, other_clip.timepoint_type
35
+ )
36
+ return s1
37
+
38
+
39
+ # TODO: 不同的filter和fusion函数不适用同一种流程,待优化
40
+ class ClipSeqFusion(object):
41
+ """_summary_
42
+
43
+ Args:
44
+ object (_type_): _description_
45
+ """
46
+
47
+ def __init__(self, filter: Callable, fuse_func: Callable = None) -> None:
48
+ self.filter = filter
49
+ self.fuse_func = fuse_func
50
+
51
+ def __call__(self, clipseq: ClipSeq) -> ClipSeq:
52
+ new_clipseq = []
53
+ n_clipseq = len(clipseq)
54
+ for i in range(n_clipseq):
55
+ clip = clipseq[i]
56
+ if self.filter(clip):
57
+ new_clipseq.append(clip)
58
+ new_clipseq = reset_clipseq_id(new_clipseq)
59
+ logger.debug(
60
+ "ClipSeqFilter: clipseq length before={}, after={}".format(
61
+ n_clipseq, len(new_clipseq)
62
+ )
63
+ )
64
+ return new_clipseq
MuseV/MMCM/mmcm/data/clip/clip_process.py ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+ from copy import deepcopy
3
+ from typing import Iterable, List, Tuple, Union
4
+ import bisect
5
+ import logging
6
+
7
+ import numpy as np
8
+
9
+
10
+ from .clip import Clip, ClipSeq
11
+ from .clipid import ClipIds, ClipIdsSeq, MatchedClipIds, MatchedClipIdsSeq
12
+
13
+ logger = logging.getLogger(__name__) # pylint: disable=invalid-name
14
+
15
+ __all__ = [
16
+ "find_idx_by_rela_time",
17
+ "find_idx_by_time",
18
+ "find_idx_by_clip",
19
+ "get_subseq_by_time",
20
+ "get_subseq_by_idx",
21
+ "clip_is_top",
22
+ "clip_is_middle",
23
+ "clip_is_end",
24
+ "abadon_old_return_new",
25
+ "reset_clipseq_id",
26
+ "insert_endclip",
27
+ "insert_startclip",
28
+ "drop_start_end_by_time",
29
+ "complete_clipseq",
30
+ "complete_gap",
31
+ "get_subseq_by_stages",
32
+ "find_time_by_stage",
33
+ ]
34
+
35
+
36
+ def find_idx_by_rela_time(clipseq: ClipSeq, timepoint: float) -> int:
37
+ clipseq_duration = clipseq.duration
38
+ timepoint = clipseq_duration * timepoint
39
+ clipseq_times = [c.duration for c in clipseq]
40
+ clipseq_times.insert(0, 0)
41
+ clipseq_times = np.cumsum(clipseq_times)
42
+ idx = bisect.bisect_right(clipseq_times, timepoint)
43
+ idx = min(max(0, idx - 1), len(clipseq) - 1)
44
+ return idx
45
+
46
+
47
+ def find_idx_by_time(clipseq: ClipSeq, timepoint: float) -> int:
48
+ """寻找指定时间timepoint 在 clipseq 中的片段位置
49
+
50
+ Args:
51
+ clipseq (ClipSeq): 待寻找的片段序列
52
+ timepoint (float): 指定时间位置
53
+
54
+ Returns:
55
+ _type_: _description_
56
+ """
57
+ clipseq_times = [c.time_start for c in clipseq]
58
+ idx = bisect.bisect_right(clipseq_times, timepoint)
59
+ idx = min(max(0, idx - 1), len(clipseq) - 1)
60
+ return idx
61
+
62
+
63
+ def find_idx_by_clip(clipseq: ClipSeq, clip: Clip, eps: float = 1e-4) -> int:
64
+ """通过计算目标clip和clipseq中所有候选clip的交集占比来找最近clip
65
+
66
+ Args:
67
+ clipseq (ClipSeq): 候选clip序列
68
+ clip (Clip): 目标clip
69
+ eps (float, optional): 最小交集占比. Defaults to 1e-4.
70
+
71
+ Returns:
72
+ int: 目标clip在候选clip序列的位置,若无则为None
73
+ """
74
+ timepoints = np.array([[c.time_start, c.time_start + c.duration] for c in clipseq])
75
+ clip_time_start = clip.time_start
76
+ clip_duraiton = clip.duration
77
+ clip_time_end = clip_time_start + clip_duraiton
78
+ max_time_start = np.maximum(timepoints[:, 0], clip_time_start)
79
+ min_time_end = np.minimum(timepoints[:, 1], clip_time_end)
80
+ intersection = min_time_end - max_time_start
81
+ intersection_ratio = intersection / clip_duraiton
82
+ max_intersection_ratio = np.max(intersection_ratio)
83
+ idx = np.argmax(intersection_ratio) if max_intersection_ratio > eps else None
84
+ return idx
85
+
86
+
87
+ def get_subseq_by_time(
88
+ clipseq: ClipSeq,
89
+ start: float = 0,
90
+ duration: float = None,
91
+ end: float = 1,
92
+ eps: float = 1e-2,
93
+ ) -> ClipSeq:
94
+ """根据时间对媒体整体做掐头去尾,保留中间部分。,也可以是大于1的数。
95
+ start和end如果是0-1的小数,则认为是是相对时间位置,实际位置会乘以duration;
96
+ start和end如果是大于1的数,则是绝对时间位置。
97
+
98
+ Args:
99
+ clipseq (ClipSeq): 待处理的序列
100
+ start (float,): 保留部分的开始,. Defaults to 0.
101
+ duration (float, optional): 媒体文件当前总时长
102
+ end (float, optional): 保留部分的结尾. Defaults to 1.
103
+
104
+ Returns:
105
+ ClipSeq: 处理后的序列
106
+ """
107
+ if (start == 0 or start is None) and (end is None or end == 1):
108
+ logger.warning("you should set start or end")
109
+ return clipseq
110
+ if duration is None:
111
+ duration = clipseq.duration
112
+ if start is None or start == 0:
113
+ clip_start_idx = 0
114
+ else:
115
+ if start < 1:
116
+ start = start * duration
117
+ clip_start_idx = find_idx_by_time(clipseq, start)
118
+ if end is None or end == 1 or np.abs(duration - end) < eps:
119
+ clip_end_idx = -1
120
+ else:
121
+ if end < 1:
122
+ end = end * duration
123
+ clip_end_idx = find_idx_by_time(clipseq, end)
124
+ if clip_end_idx != -1 and clip_start_idx >= clip_end_idx:
125
+ logger.error(
126
+ f"clip_end_idx({clip_end_idx}) should be > clip_start_idx({clip_start_idx})"
127
+ )
128
+ subseq = get_subseq_by_idx(clipseq, clip_start_idx, clip_end_idx)
129
+ return subseq
130
+
131
+
132
+ def get_subseq_by_idx(clipseq: ClipSeq, start: int = None, end: int = None) -> ClipSeq:
133
+ """通过指定索引范围,切片子序列
134
+
135
+ Args:
136
+ clipseq (ClipSeq):
137
+ start (int, optional): 开始索引. Defaults to None.
138
+ end (int, optional): 结尾索引. Defaults to None.
139
+
140
+ Returns:
141
+ _type_: _description_
142
+ """
143
+ if start is None and end is None:
144
+ return clipseq
145
+ if start is None:
146
+ start = 0
147
+ if end is None:
148
+ end = len(clipseq)
149
+ return clipseq[start:end]
150
+
151
+
152
+ def clip_is_top(clip: Clip, total: float, th: float = 0.1) -> bool:
153
+ """判断Clip是否属于开始部分
154
+
155
+ Args:
156
+ clip (Clip):
157
+ total (float): 所在ClipSeq总时长
158
+ th (float, optional): 开始范围的截止位置. Defaults to 0.05.
159
+
160
+ Returns:
161
+ Bool: 是不是头部Clip
162
+ """
163
+ clip_time = clip.time_start
164
+ if clip_time / total <= th:
165
+ return True
166
+ else:
167
+ return False
168
+
169
+
170
+ def clip_is_end(clip: Clip, total: float, th: float = 0.9) -> bool:
171
+ """判断Clip是否属于结尾部分
172
+
173
+ Args:
174
+ clip (Clip):
175
+ total (float): 所在ClipSeq总时长
176
+ th (float, optional): 结尾范围的开始位置. Defaults to 0.9.
177
+
178
+ Returns:
179
+ Bool: 是不是尾部Clip
180
+ """
181
+ clip_time = clip.time_start + clip.duration
182
+ if clip_time / total >= th:
183
+ return True
184
+ else:
185
+ return False
186
+
187
+
188
+ def clip_is_middle(
189
+ clip: Clip, total: float, start: float = 0.05, end: float = 0.9
190
+ ) -> bool:
191
+ """判断Clip是否属于中间部分
192
+
193
+ Args:
194
+ clip (Clip):
195
+ total (float): 所在ClipSeq总时长
196
+ start (float, optional): 中间范围的开始位置. Defaults to 0.05.
197
+ start (float, optional): 中间范围的截止位置. Defaults to 0.9.
198
+
199
+ Returns:
200
+ Bool: 是不是中间Clip
201
+ """
202
+ if start >= 0 and start < 1:
203
+ start = total * start
204
+ if end > 0 and end <= 1:
205
+ end = total * end
206
+ clip_time_start = clip.time_start
207
+ clip_time_end = clip.time_start + clip.duration
208
+ if (clip_time_start >= start) and (clip_time_end <= end):
209
+ return True
210
+ else:
211
+ return False
212
+
213
+
214
+ def abadon_old_return_new(s1: Clip, s2: Clip) -> Clip:
215
+ """特殊的融合方式
216
+ Args:
217
+
218
+ s1 (Clip): 靠前的clip
219
+ s2 (Clip): 靠后的clip
220
+
221
+ Returns:
222
+ Clip: 融合后的Clip
223
+ """
224
+ return s2
225
+
226
+
227
+ # TODO:待确认是否要更新clipid,不方便对比着json进行debug
228
+ def reset_clipseq_id(clipseq: ClipSeq) -> ClipSeq:
229
+ for i in range(len(clipseq)):
230
+ if isinstance(clipseq[i], dict):
231
+ clipseq[i]["clipid"] = i
232
+ else:
233
+ clipseq[i].clipid = i
234
+ return clipseq
235
+
236
+
237
+ def insert_startclip(clipseq: ClipSeq) -> ClipSeq:
238
+ """给ClipSeq插入一个开始片段。
239
+
240
+ Args:
241
+ clipseq (ClipSeq):
242
+ clip_class (Clip, optional): 插入的Clip类型. Defaults to Clip.
243
+
244
+ Returns:
245
+ ClipSeq: 插入头部Clip的新ClipSeq
246
+ """
247
+ if clipseq[0].time_start > 0:
248
+ start = clipseq.ClipClass(
249
+ time_start=0, duration=round(clipseq[0].time_start, 3), timepoint_type=0
250
+ )
251
+ clipseq.insert(0, start)
252
+ clipseq = reset_clipseq_id(clipseq)
253
+ return clipseq
254
+
255
+
256
+ def insert_endclip(clipseq: ClipSeq, duration: float) -> ClipSeq:
257
+ """给ClipSeq插入一个尾部片段。
258
+
259
+ Args:
260
+ clipseq (ClipSeq):
261
+ duration(float, ): 序列的总时长
262
+ clip_class (Clip, optional): 插入的Clip类型. Defaults to Clip.
263
+
264
+ Returns:
265
+ ClipSeq: 插入尾部Clip的新ClipSeq
266
+ """
267
+ clipseq_endtime = clipseq[-1].time_start + clipseq[-1].duration
268
+ if duration - clipseq_endtime > 1:
269
+ end = clipseq.ClipClass(
270
+ time_start=round(clipseq_endtime, 3),
271
+ duration=round(duration - clipseq_endtime, 3),
272
+ timepoint_type=0,
273
+ )
274
+ clipseq.append(end)
275
+ clipseq = reset_clipseq_id(clipseq)
276
+ return clipseq
277
+
278
+
279
+ def drop_start_end_by_time(
280
+ clipseq: ClipSeq, start: float, end: float, duration: float = None
281
+ ):
282
+ return get_subseq_by_time(clipseq=clipseq, start=start, end=end, duration=duration)
283
+
284
+
285
+ def complete_clipseq(
286
+ clipseq: ClipSeq, duration: float = None, gap_th: float = 2
287
+ ) -> ClipSeq:
288
+ """绝大多数需要clipseq中的时间信息是连续、完备的,有时候是空的,需要补足的部分。
289
+ 如歌词时间戳生成的music_map缺头少尾、中间有空的部分。
290
+
291
+ Args:
292
+ clipseq (ClipSeq): 待补集的序列
293
+ duration (float, optional): 整个序列持续时间. Defaults to None.
294
+ gap_th (float, optional): 有时候中间空隙过短就会被融合到上一个片段中. Defaults to 2.
295
+
296
+ Returns:
297
+ ClipSeq: 补集后的序列,时间连续、完备。
298
+ """
299
+ if isinstance(clipseq, list):
300
+ clipseq = ClipSeq(clipseq)
301
+ return complete_clipseq(clipseq=clipseq, duration=duration, gap_th=gap_th)
302
+ clipseq = complete_gap(clipseq, th=gap_th)
303
+ clipseq = insert_startclip(clipseq)
304
+ if duration is not None:
305
+ clipseq = insert_endclip(clipseq, duration)
306
+ return clipseq
307
+
308
+
309
+ def complete_gap(clipseq: ClipSeq, th: float = 2) -> ClipSeq:
310
+ """generate blank clip timepoint = 0,如果空白时间过短,则空白附到上一个歌词片段中。
311
+
312
+
313
+ Args:
314
+ clipseq (ClipSeq): 原始的歌词生成的MusicClipSeq
315
+ th (float, optional): 有时候中间空隙过短就会被融合到上一个片段中. Defaults to 2.
316
+
317
+ Returns:
318
+ ClipSeq: 补全后的
319
+ """
320
+ gap_clipseq = []
321
+ clipid = 0
322
+ for i in range(len(clipseq) - 1):
323
+ time_start = clipseq[i].time_start
324
+ duration = clipseq[i].duration
325
+ time_end = time_start + duration
326
+ next_time_start = clipseq[i + 1].time_start
327
+ time_diff = next_time_start - time_end
328
+ if time_diff >= th:
329
+ blank_clip = clipseq.ClipClass(
330
+ time_start=time_end,
331
+ duration=time_diff,
332
+ timepoint_type=0,
333
+ clipid=clipid,
334
+ )
335
+ gap_clipseq.append(blank_clip)
336
+ clipid += 1
337
+ else:
338
+ clipseq[i].duration = next_time_start - time_start
339
+ clipseq.extend(gap_clipseq)
340
+ clipseq.clips = sorted(clipseq.clips, key=lambda clip: clip.time_start)
341
+ reset_clipseq_id(clipseq)
342
+ return clipseq
343
+
344
+
345
+ def find_time_by_stage(
346
+ clipseq: ClipSeq, stages: Union[str, List[str]] = None
347
+ ) -> Tuple[float, float]:
348
+ if isinstance(stages, list):
349
+ stages = [stages]
350
+ for clip in clipseq:
351
+ if clip.stage in stages:
352
+ return clip.time_start, clip.time_end
353
+ return None, None
354
+
355
+
356
+ def get_subseq_by_stages(clipseq: ClipSeq, stages: Union[str, List[str]]) -> ClipSeq:
357
+ if isinstance(stages, List):
358
+ stages = [stages]
359
+ start, _ = find_time_by_stage(clipseq, stages[0])
360
+ _, end = find_time_by_stage(clipseq, stages[-1])
361
+ if start1 is None:
362
+ start1 = 0
363
+ if end2 is None:
364
+ end2 = clipseq.duration
365
+ subseq = get_subseq_by_time(clipseq=clipseq, start=start, end=end)
366
+ return subseq
MuseV/MMCM/mmcm/data/clip/clip_stat.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple
2
+
3
+ import numpy as np
4
+
5
+ from .clip import ClipSeq
6
+
7
+
8
+ def stat_clipseq_duration(
9
+ clipseq: ClipSeq,
10
+ ) -> Tuple[np.array, np.array]:
11
+ clip_duration = [clip.duration for clip in clipseq]
12
+ (hist, bin_edges) = np.histogram(clip_duration)
13
+ return hist, bin_edges
MuseV/MMCM/mmcm/data/clip/clipid.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Union, List
4
+
5
+ __all__ = [
6
+ "ClipIds",
7
+ "ClipIdsSeq",
8
+ "MatchedClipIds",
9
+ "MatchedClipIdsSeq",
10
+ ]
11
+
12
+
13
+ class ClipIds(object):
14
+ def __init__(
15
+ self,
16
+ clipids: Union[int, List[int]],
17
+ ) -> None:
18
+ """ClipSeq 中的 Clip序号,主要用于多个 Clip 融合后的 Clip, 使用场景如
19
+ 1. 一个 MusicClip 可以匹配到多个 VideoClip,VideoClip 的索引便可以使用 ClipIds 定义。
20
+
21
+ Args:
22
+ clipids (list or int): ClipSeq 中的序号
23
+ """
24
+ self.clipids = clipids if isinstance(clipids, list) else [clipids]
25
+
26
+
27
+ class ClipIdsSeq(object):
28
+ def __init__(self, clipids_seq: List[ClipIds]) -> None:
29
+ """多个 ClipIds,使用场景可以是
30
+ 1. 将MediaClipSeq 进行重组,拆分重组成更粗粒度的ClipSeq;
31
+
32
+ Args:
33
+ clipids_seq (list): 组合后的 ClipIds 列表
34
+ """
35
+ self.clipids_seq = (
36
+ clipids_seq if isinstance(clipids_seq, ClipIds) else [clipids_seq]
37
+ )
38
+
39
+
40
+ # TODO: metric后续可能是字典
41
+ class MatchedClipIds(object):
42
+ def __init__(
43
+ self, id1: ClipIds, id2: ClipIds, metric: float = None, **kwargs
44
+ ) -> None:
45
+ """两种模态数据的片段匹配对,使用场景 可以是
46
+ 1. 音乐片段和视频片段 之间的匹配关系,
47
+
48
+ Args:
49
+ id1 (ClipIds): 第一种模态的片段
50
+ id2 (ClipIds): 第二种模态的片段
51
+ metric (float): 匹配度量距离
52
+ """
53
+ self.id1 = id1 if isinstance(id1, ClipIds) else ClipIds(id1)
54
+ self.id2 = id2 if isinstance(id2, ClipIds) else ClipIds(id2)
55
+ self.metric = metric
56
+ self.__dict__.update(**kwargs)
57
+
58
+
59
+ class MatchedClipIdsSeq(object):
60
+ def __init__(self, seq: List[MatchedClipIds], metric: float = None, **kwargs) -> None:
61
+ """两种模态数据的序列匹配对,使用场景可以是
62
+ 1. 音乐片段序列和视频片段序列 之间的匹配,每一个元素都是MatchedClipIds:
63
+
64
+ Args:
65
+ seq (list): 两种模态数据的序列匹配对列表
66
+ metric (float): 匹配度量距离
67
+ """
68
+ self.seq = seq
69
+ self.metric = metric
70
+ self.__dict__.update(**kwargs)
MuseV/MMCM/mmcm/data/crawl/__init__.py ADDED
File without changes
MuseV/MMCM/mmcm/data/crawl/download.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from collections import namedtuple
3
+ from typing import NamedTuple, Tuple, List
4
+ import logging
5
+ import os
6
+ import numpy as np
7
+ import subprocess
8
+
9
+ import requests
10
+
11
+ import wget
12
+
13
+ from .youtube import download_youtube
14
+ from .flicker import download_flickr
15
+ from .ffmpeg import ffmpeg_load
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # DownloadStatus = namedtuple("DownloadStatus", ["status_code", "msg"])
20
+
21
+ status_code = {0: "download: succ",
22
+ -1: "download: failed",
23
+ -2: "clip: failed",
24
+ -3: "directory not exists",
25
+ -4: "skip task",
26
+ - 404: "param error"}
27
+
28
+
29
+ def download_with_request(url, path):
30
+ res = requests.get(url)
31
+ if res.status_code == '200' or res.status_code == 200:
32
+ with open(path, "wb") as f:
33
+ f.write(res.content)
34
+ else:
35
+ print('request failed')
36
+ return path
37
+
38
+ def download_video(url, save_path:str=None, save_dir:str=None, basename:str=None, filename:str=None, format:str=None, data_type: str="wget", **kwargs) -> Tuple[int, str]:
39
+ if save_path is None:
40
+ if basename is None:
41
+ basename = f"{filename}.{format}"
42
+ save_path = os.path.join(save_dir, basename)
43
+ if save_dir is None:
44
+ save_dir = os.path.dirname(save_path)
45
+ if basename is None:
46
+ basename = os.path.basename(save_path)
47
+ if filename is None:
48
+ filename, format = os.path.splitext(basename)
49
+ os.makedirs(save_dir, exist_ok=True)
50
+
51
+ if os.path.exists(save_path):
52
+ return (-4, save_path)
53
+
54
+ try:
55
+ if data_type == "requests":
56
+ save_path = download_with_request(url=url, path=save_path)
57
+ elif data_type == "wget":
58
+ save_path = wget.download(url=url, out=save_path)
59
+ elif data_type == "youtube":
60
+ save_path = download_youtube(url, format=format, save_dir=save_dir, filename=basename)
61
+ elif data_type == "flickr":
62
+ save_path = download_flickr(url, save_path)
63
+ elif data_type == "ffmpeg":
64
+ code = ffmpeg_load(url=url, save_path=save_path)
65
+ else:
66
+ raise ValueError(f"data_type shoulbe one of [wget, youtube, flickr, ffmpeg], but given {data_type}")
67
+ except Exception as e:
68
+ logger.error("failed download file {} to {} failed!".format(url, save_path))
69
+ logger.exception(e)
70
+ return (-1, None)
71
+
72
+ return (0, save_path)
MuseV/MMCM/mmcm/data/crawl/error.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ class SubprocessError(Exception):
4
+ """
5
+ Exception object that contains information about an error that occurred
6
+ when running a command line command with a subprocess.
7
+ """
8
+
9
+ def __init__(self, cmd, return_code, stdout, stderr, *args):
10
+ msg = 'Got non-zero exit code ({1}) from command "{0}": {2}'
11
+ if stderr.strip():
12
+ err_msg = stderr
13
+ else:
14
+ err_msg = stdout
15
+ msg = msg.format(cmd[0], return_code, err_msg)
16
+ self.cmd = cmd
17
+ self.cmd_return_code = return_code
18
+ self.cmd_stdout = stdout
19
+ self.cmd_stderr = stderr
20
+ super(SubprocessError, self).__init__(msg, *args)
MuseV/MMCM/mmcm/data/crawl/ffmpeg.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+
3
+ from .error import SubprocessError
4
+
5
+
6
+ class FfmpegInvalidURLError(Exception):
7
+ """
8
+ Exception raised when a 4XX or 5XX error is returned when making a request
9
+ """
10
+
11
+ def __init__(self, url, error, *args):
12
+ self.url = url
13
+ self.error = error
14
+ msg = 'Got error when making request to "{}": {}'.format(url, error)
15
+ super(FfmpegInvalidURLError, self).__init__(msg, *args)
16
+
17
+
18
+ def ffmpeg_load(url: str, save_path: str) -> str:
19
+
20
+ def run(cmd):
21
+ proc = subprocess.Popen(
22
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
23
+ stdout, stderr = proc.communicate()
24
+ return_code = proc.returncode
25
+
26
+ if return_code != 0:
27
+ raise SubprocessError(
28
+ cmd, return_code, stdout.decode(), stderr.decode())
29
+ return return_code
30
+
31
+ command = ['ffmpeg', '-n', '-i', url, '-t', '10', '-f', 'mp4',
32
+ '-r', '30', '-vcodec', 'h264', save_path, '-loglevel', 'error']
33
+ code = run(command)
34
+ return code
35
+
36
+
37
+
38
+
39
+
MuseV/MMCM/mmcm/data/crawl/flicker.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from .ffmpeg import ffmpeg_load
4
+
5
+
6
+ def extract_flickr_id(url):
7
+ return url.strip('/').split('/')[-4]
8
+
9
+
10
+ def download_flickr(url: str, save_path: str) -> str:
11
+ code = -1
12
+ code = ffmpeg_load(url=url,
13
+ save_path=save_path)
14
+ if code == 0:
15
+ return (code, save_path)
16
+ # only retry when failed!
17
+ flickr_id = extract_flickr_id(url)
18
+ url = 'https://www.flickr.com/video_download.gne?id={}'.format(
19
+ flickr_id)
20
+ code = ffmpeg_load(url=url,
21
+ save_path=save_path)
22
+ return save_path
MuseV/MMCM/mmcm/data/crawl/youtube.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+
4
+ from pytube import YouTube
5
+
6
+
7
+ def download_youtube(url, format, save_dir, filename):
8
+ youtube = YouTube(url)
9
+ streams = youtube.streams.filter(progressive=True,
10
+ file_extension=format)
11
+ save_path = streams.get_highest_resolution().download(output_path=save_dir,
12
+ filename=filename)
13
+ return save_path
MuseV/MMCM/mmcm/data/emb/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .emb import *
2
+ from .h5py_emb import H5pyMediaMapEmb, H5pyMediaMapEmbProxy
MuseV/MMCM/mmcm/data/emb/emb.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """用于将 mediamap中的emb存储独立出去,仍处于开发中
2
+ """
3
+ import logging
4
+
5
+ import numpy as np
6
+
7
+
8
+ logger = logging.getLogger(__name__) # pylint: disable=invalid-name
9
+
10
+ __all__ = ["MediaMapEmb"]
11
+
12
+
13
+ class MediaMapEmb(object):
14
+ def __init__(self, path: str) -> None:
15
+ """
16
+ OfflineEmb = {
17
+ "overall_algo": Emb, # 整个文件的Emb
18
+ # 整个文件的多维度 Emb
19
+ "theme": np.array, # 主题,
20
+ "emotion_algo": np.array, # 情绪,
21
+ "semantic_algo": np.array, # 语义
22
+
23
+ "clips_overall_algo": np.array, n_clip x clip_emb
24
+ "clips_emotion_algo": np.array, n_clip x clip_emb
25
+ "clips_semantic_algo": np.array, n_clip x clip_emb
26
+ "clips_theme_algo": np.array, n_clip x clip_emb
27
+
28
+ "scenes_overall_algo": np.array, n_scenes x scene_emb
29
+ "scenes_emotion_algo": np.array, n_scenes x scene_emb
30
+ "scenes_semantic_algo": np.array, n_scenes x scene_emb
31
+ "scenes_theme_algo": E np.arraymb, n_scenes x scene_emb
32
+ # 片段可以是转场切分、MusicStage等, clips目前属于转场切分片段
33
+ # 若后续需要新增段落分割,可以和clips同级新增 stage字段。
34
+
35
+ "frames_overall_algo": np.array, n_frames x frame_emb
36
+ "frames_emotion_algo": np.array, n_frames x frame_emb
37
+ "frames_semantic_algo": np.array, n_frames x frame_emb
38
+ "frames_theme_algo": np.array, n_frames x frame_emb
39
+ "frames_objs": {
40
+ "frame_id": { #
41
+ "overall_algo": np.array, n_objs x obj_emb
42
+ "emotion_algo": np.array, n_objs x obj_emb
43
+ "semantic_algo": np.array, n_objs x obj_emb
44
+ "theme_algo": np.array, n_objs x obj_emb
45
+ }
46
+ }
47
+ "roles_algo": {
48
+ "roleid": np.array, n x obj_emb
49
+ }
50
+ }
51
+
52
+
53
+ Args:
54
+ path (str): hdf5 存储路径
55
+ """
56
+ self.path = path
57
+
58
+ def get_value(self, key, idx=None):
59
+ raise NotImplementedError
60
+
61
+ def __getitem__(self, key):
62
+ return self.get_value(key)
63
+
64
+ def get_media(self, factor, algo):
65
+ return self.get_value(f"{factor}_{algo}")
66
+
67
+ def get_clips(self, factor, algo, idx=None):
68
+ return self.get_value(f"clips_{factor}_{algo}", idx=idx)
69
+
70
+ def get_frames(self, factor, algo, idx=None):
71
+ return self.get_value(f"frames_{factor}_{algo}", idx=idx)
72
+
73
+ def get_frame_objs(self, frame_idx, factor, algo, idx=None):
74
+ return self.get_value(["frames_objs", frame_idx, f"{factor}_{algo}"], idx=idx)
75
+
76
+ def set_value(self, key, value, idx=None):
77
+ raise NotImplementedError
78
+
79
+ def set_media(self, factor, value, algo):
80
+ self.set_value([f"{factor}_{algo}"], value)
81
+
82
+ def set_clips(self, factor, value, algo, idx=None):
83
+ self.set_value([f"clips_{factor}_{algo}"], value, idx=idx)
84
+
85
+ def set_frames(self, factor, value, algo, idx=None):
86
+ self.set_value([f"frames_{factor}_{algo}"], value)
87
+
88
+ def set_frame_objs(self, frame_idx, factor, value, algo, idx=None):
89
+ return self.set_value(
90
+ ["frames_objs", frame_idx, f"{factor}_{algo}"], value, idx=idx
91
+ )
92
+
93
+ def set_roles(self, algo, value, idx=None):
94
+ return self.set_value(f"roles_{algo}", value, idx=idx)
95
+
96
+ def get_roles(self, algo, idx=None):
97
+ return self.get_value(f"roles_{algo}", idx=idx)
98
+
99
+ def __setitem__(self, key, value):
100
+ self.set_value(self, key, value)
101
+
102
+
103
+ class MediaMapEmbProxy(MediaMapEmb):
104
+ pass
MuseV/MMCM/mmcm/data/emb/h5py_emb.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Union, List
2
+ import logging
3
+
4
+ import h5py
5
+ import numpy as np
6
+
7
+ from .emb import MediaMapEmb
8
+
9
+ logger = logging.getLogger(__name__) # pylint: disable=invalid-name
10
+
11
+ __all__ = ["H5pyMediaMapEmb", "save_value_with_h5py"]
12
+
13
+
14
+ def save_value_with_h5py(
15
+ path: str,
16
+ value: Union[np.ndarray, None],
17
+ key: str,
18
+ idx: Union[int, List[int]] = None,
19
+ dtype=None,
20
+ shape=None,
21
+ overwrite: bool = False,
22
+ ):
23
+ with h5py.File(path, "a") as f:
24
+ if dtype is None:
25
+ dtype = value.dtype
26
+ if shape is None:
27
+ shape = value.shape
28
+ del_key = False
29
+ if key in f:
30
+ if overwrite:
31
+ del_key = True
32
+ if f[key].dtype != h5py.special_dtype(vlen=str):
33
+ if f[key].shape != value.shape:
34
+ del_key = True
35
+ if del_key:
36
+ del f[key]
37
+ if key not in f:
38
+ f.create_dataset(key, shape=shape, dtype=dtype)
39
+ if idx is None:
40
+ f[key][...] = value
41
+ else:
42
+ f[key][idx] = value
43
+
44
+
45
+ class H5pyMediaMapEmb(MediaMapEmb):
46
+ def __init__(self, path: str) -> None:
47
+ """
48
+ OfflineEmb = {
49
+ "overall_algo": Emb, # 整个文件的Emb
50
+ # 整个文件的多维度 Emb
51
+ "theme": np.array, # 主题,
52
+ "emotion_algo": np.array, # 情绪,
53
+ "semantic_algo": np.array, # 语义
54
+
55
+ "clips_overall_algo": np.array, n_clip x clip_emb
56
+ "clips_emotion_algo": np.array, n_clip x clip_emb
57
+ "clips_semantic_algo": np.array, n_clip x clip_emb
58
+ "clips_theme_algo": np.array, n_clip x clip_emb
59
+
60
+ "scenes_overall_algo": np.array, n_scenes x scene_emb
61
+ "scenes_emotion_algo": np.array, n_scenes x scene_emb
62
+ "scenes_semantic_algo": np.array, n_scenes x scene_emb
63
+ "scenes_theme_algo": E np.arraymb, n_scenes x scene_emb
64
+ # 片段可以是转场切分、MusicStage等, clips目前属于转场切分片段
65
+ # 若后续需要新增段落分割,可以和clips同级新增 stage字段。
66
+
67
+ "frames_overall_algo": np.array, n_frames x frame_emb
68
+ "frames_emotion_algo": np.array, n_frames x frame_emb
69
+ "frames_semantic_algo": np.array, n_frames x frame_emb
70
+ "frames_theme_algo": np.array, n_frames x frame_emb
71
+ "frames_objs_algo": {
72
+ "frame_id_algo": { #
73
+ "overall_algo": np.array, n_objs x obj_emb
74
+ "emotion_algo": np.array, n_objs x obj_emb
75
+ "semantic_algo": np.array, n_objs x obj_emb
76
+ "theme_algo": np.array, n_objs x obj_emb
77
+ }
78
+ }
79
+ "roles_algo": {
80
+ "roleid": np.array, n x obj_emb
81
+ }
82
+ }
83
+
84
+ Args:
85
+ path (str): hdf5 存储路径
86
+ """
87
+ super().__init__(path)
88
+ # 待优化支持 with open 的方式来读写
89
+ self.f = h5py.File(path, "a")
90
+
91
+ def _keys_index(self, key):
92
+ if not isinstance(key, list):
93
+ key = [key]
94
+ key = "/".join([str(x) for x in key if x is not None])
95
+ return key
96
+
97
+ def get_value(self, key, idx=None):
98
+ new_key = self._keys_index(key)
99
+ if idx is None:
100
+ data = np.array(self.f[new_key])
101
+ else:
102
+ data = np.array(self.f[new_key][idx])
103
+ return data
104
+
105
+ def set_value(self, key, value, idx=None):
106
+ new_key = self._keys_index(key)
107
+ if new_key not in self.f:
108
+ self.f.create_dataset(new_key, shape=value.shape, dtype=value.dtype)
109
+ if idx is None:
110
+ self.f[new_key][...] = value
111
+ else:
112
+ self.f[new_key][idx] = value
113
+
114
+ def close(self):
115
+ self.f.close()
116
+
117
+
118
+ class H5pyMediaMapEmbProxy(H5pyMediaMapEmb):
119
+ pass
MuseV/MMCM/mmcm/data/emb/json_emb.py ADDED
File without changes
MuseV/MMCM/mmcm/data/emb/numpy_emb.py ADDED
File without changes
MuseV/MMCM/mmcm/data/extract_feature/__init__.py ADDED
File without changes
MuseV/MMCM/mmcm/data/extract_feature/base_extract_feature.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Union, Any
2
+
3
+ import torch
4
+ from torch import nn
5
+ import numpy as np
6
+ import h5py
7
+
8
+
9
+ class BaseFeatureExtractor(nn.Module):
10
+ def __init__(self, device: str = "cpu", dtype=torch.float32, name: str = None):
11
+ super().__init__()
12
+ self.device = device
13
+ self.dtype = dtype
14
+ self.name = name
15
+
16
+ def extract(
17
+ self, data: Any, return_type: Union[str, str] = "numpy"
18
+ ) -> Union[np.ndarray, torch.tensor]:
19
+ raise NotADirectoryError
20
+
21
+ def __call__(self, *args: Any, **kwds: Any) -> Any:
22
+ return self.extract(*args, **kwds)
23
+
24
+ def save_with_h5py(self, f: Union[h5py.File, str], *args, **kwds):
25
+ raise NotImplementedError
26
+
27
+ def forward(self, *args: Any, **kwds: Any) -> Any:
28
+ return self.extract(*args, **kwds)
MuseV/MMCM/mmcm/data/general/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .items import Items
MuseV/MMCM/mmcm/data/general/items.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import UserList
2
+ from collections.abc import Iterable
3
+ from typing import Iterator, Any, List
4
+
5
+ from ...utils.util import convert_class_attr_to_dict
6
+
7
+ __all__ = ["Item", "Items"]
8
+
9
+
10
+ class Item(object):
11
+ def __init__(self) -> None:
12
+ pass
13
+
14
+ def to_dct(self, target_keys: List[str] = None, ignored_keys: List[str] = None):
15
+ base_ignored_keys = [
16
+ "kwargs",
17
+ ]
18
+ if isinstance(ignored_keys, list):
19
+ base_ignored_keys.extend(ignored_keys)
20
+ elif isinstance(ignored_keys, str):
21
+ base_ignored_keys.append(ignored_keys)
22
+ else:
23
+ pass
24
+ return convert_class_attr_to_dict(
25
+ self, target_keys=target_keys, ignored_keys=base_ignored_keys
26
+ )
27
+
28
+ def preprocess(self):
29
+ pass
30
+
31
+
32
+ class Items(UserList):
33
+ def __init__(
34
+ self,
35
+ data: Any = None,
36
+ ):
37
+ if data is None:
38
+ data = list()
39
+ if not isinstance(data, list):
40
+ data = [data]
41
+ super().__init__(data)
42
+
43
+ def __len__(self):
44
+ return len(self.data)
45
+
46
+ def __getitem__(self, i):
47
+ return self.data[i]
48
+
49
+ def __delitem__(self, i):
50
+ del self.data[i]
51
+
52
+ def __setitem__(self, i, v):
53
+ self.data[i] = v
54
+
55
+ def insert(self, i, v):
56
+ self.data.insert(i, v)
57
+
58
+ def __str__(self):
59
+ return str(self.data)
60
+
61
+ def to_dct(self, target_keys: List[str] = None, ignored_keys: List[str] = None):
62
+ items = [item.to_dct(target_keys, ignored_keys) for item in self.data]
63
+ return items
64
+
65
+ def __iter__(self) -> Iterator:
66
+ return iter(self.data)
67
+
68
+ def preprocess(self):
69
+ pass
MuseV/MMCM/mmcm/data/media_map/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .media_map import MetaInfo, MediaMap, MetaInfoList
MuseV/MMCM/mmcm/data/media_map/media_map.py ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import bisect
3
+
4
+ import logging
5
+ from copy import deepcopy
6
+
7
+ from functools import partial
8
+ from typing import Any, Callable, Iterable, List, Union, Tuple, Dict
9
+
10
+ import numpy as np
11
+ from ..clip.clip_process import get_subseq_by_time
12
+ from ..clip.clip_stat import stat_clipseq_duration
13
+ from ..clip import Clip, ClipSeq, ClipIds, MatchedClipIds, MatchedClipIdsSeq
14
+ from .media_map_process import get_sub_mediamap_by_time
15
+ from ..emb import MediaMapEmb, H5pyMediaMapEmb
16
+ from ..general.items import Item, Items
17
+ from ...utils.data_util import pick_subdct
18
+ from ...utils.util import convert_class_attr_to_dict, load_dct_from_file
19
+
20
+ logger = logging.getLogger(__name__) # pylint: disable=invalid-name
21
+
22
+
23
+ __all__ = ["MetaInfo", "MetaInfoList", "MediaMap", "MediaMapSeq"]
24
+
25
+
26
+ class MetaInfo(Item):
27
+ """歌曲、视频等媒体文件级别的元信息"""
28
+
29
+ def __init__(
30
+ self,
31
+ mediaid=None,
32
+ media_name=None,
33
+ media_duration=None,
34
+ signature=None,
35
+ media_path: str = None,
36
+ media_map_path: str = None,
37
+ start: float = None,
38
+ end: float = None,
39
+ ext=None,
40
+ **kwargs,
41
+ ):
42
+ super(MetaInfo).__init__()
43
+ self.mediaid = mediaid
44
+ self.media_name = media_name
45
+ self.media_duration = media_duration
46
+ self.signature = signature
47
+ self.media_path = media_path
48
+ self.media_map_path = media_map_path
49
+ self.start = start
50
+ self.end = end
51
+ self.ext = ext
52
+ self.__dict__.update(**kwargs)
53
+ self.preprocess()
54
+
55
+ def preprocess(self):
56
+ self.set_start_end()
57
+
58
+ def set_start_end(self):
59
+ if self.start is None:
60
+ self.start = 0
61
+ elif self.start >= 0 and self.start <= 1:
62
+ self.start = self.start * self.media_duration
63
+
64
+ if self.end is None:
65
+ self.end = self.media_duration
66
+ elif self.end >= 0 and self.end <= 1:
67
+ self.end = self.end * self.media_duration
68
+
69
+
70
+ class MetaInfoList(Items):
71
+ """媒体元数据列表,主要用于多歌曲、多视频剪辑时存储原单一媒体文件的元信息"""
72
+
73
+ def __init__(self, items: Union[MetaInfo, List[MetaInfo]] = None):
74
+ """
75
+ Args:
76
+ meta_info_list (list, optional): MetaInfo 列表. Defaults to None.
77
+ """
78
+ if items is None:
79
+ items = []
80
+ else:
81
+ items = items if isinstance(items, list) else [items]
82
+ super().__init__(items)
83
+ self.meta_info_list = self.items
84
+ if len(self.items) > 1:
85
+ self.reset()
86
+
87
+ def __len__(self):
88
+ return len(self.meta_info_list)
89
+
90
+ def __getitem__(self, i) -> MetaInfo:
91
+ return self.meta_info_list[i]
92
+
93
+ @property
94
+ def groupnum(self) -> int:
95
+ return len(self.meta_info_list)
96
+
97
+
98
+ class MediaMap(object):
99
+ """媒体信息基类,也可以理解为音乐谱面、视觉谱面、音游谱面基类。主要有 MetaInfo、MetaInfoList、ClipSeq 属性。
100
+ 不同的媒体信息的 属性 类会有不同,所以在类变量里做定义。如有变化,可以定义自己的属性类。
101
+ """
102
+
103
+ def __init__(
104
+ self,
105
+ meta_info: MetaInfo = None,
106
+ clipseq: ClipSeq = None,
107
+ stageseq: ClipSeq = None,
108
+ frameseq: ClipSeq = None,
109
+ emb: H5pyMediaMapEmb = None,
110
+ **kwargs,
111
+ ):
112
+ """用于存储media的相关信息,media_info是json或直接字典
113
+
114
+ Args:
115
+ meta_info (MetaInfo): 当sub_meta_info不为None时, meta_info由sub_meta_info整合而成
116
+ sub_meta_info (None or [MetaInfo]): 当多个MediaInfo拼在一起时,用于保留子MediaInfo的信息
117
+ clipseq (ClipSeq): # 按照clipidx排序;
118
+ stageseq (ClipSeq): # 比 clipseq 更高纬度的片段划分,例如clips是镜头分割,stages是scenes分割;clips是关键点分割,stages是结构分割;
119
+ frameseq (ClipSeq): # 比 clipseq 更低纬度的片段划分
120
+ kwargs (dict, optional): 所有相关信息都会作为 meta_info 的补充,赋值到 meta_info 中
121
+ """
122
+ self.meta_info = meta_info
123
+ self.clipseq = clipseq
124
+ self.frameseq = frameseq
125
+ self.stageseq = stageseq
126
+ self.emb = emb
127
+ self.meta_info.__dict__.update(**kwargs)
128
+ self.preprocess()
129
+
130
+ def preprocess(
131
+ self,
132
+ ):
133
+ if (self.meta_info.start != 0 and self.meta_info.start is not None) or (
134
+ self.meta_info.end is not None and self.meta_info.end == 1
135
+ ):
136
+ self.drop_head_and_tail()
137
+ self.meta_info.preprocess()
138
+ if self.clipseq is not None:
139
+ self.clipseq.preprocess()
140
+ if self.frameseq is not None:
141
+ self.frameseq.preprocess()
142
+ if self.stageseq is not None:
143
+ self.stageseq.preprocess()
144
+ self.clip_start_idx = self.clipseq[0].clipid
145
+ self.clip_end_idx = self.clipseq[-1].clipid
146
+
147
+ def drop_head_and_tail(self) -> MediaMap:
148
+ self.clipseq = get_subseq_by_time(
149
+ self.clipseq,
150
+ start=self.meta_info.start,
151
+ end=self.meta_info.end,
152
+ duration=self.meta_info.media_duration,
153
+ )
154
+ if self.stageseq is not None:
155
+ self.stageseq = get_subseq_by_time(
156
+ self.clipseq,
157
+ start=self.meta_info.start,
158
+ end=self.meta_info.end,
159
+ duration=self.meta_info.media_duration,
160
+ )
161
+
162
+ def set_clip_value(self, k, v):
163
+ """为clipseq中的每个clip赋值,
164
+
165
+ Args:
166
+ k (str): Clip中字段名
167
+ v (any): Clip中字段值
168
+ """
169
+ self.clipseq.set_clip_value(k, v)
170
+
171
+ def spread_metainfo_2_clip(
172
+ self, target_keys: List = None, ignored_keys: List = None
173
+ ) -> None:
174
+ """将metainfo中的信息赋值到clip中,便于clip后面做相关处理。
175
+
176
+ Args:
177
+ target_keys ([str]): 待赋值的目标字段
178
+ """
179
+ dst = pick_subdct(
180
+ self.meta_info.__dict__, target_keys=target_keys, ignored_keys=ignored_keys
181
+ )
182
+ for k, v in dst.items():
183
+ self.set_clip_value(k, v)
184
+
185
+ def spread_parameters(self, target_keys: list, ignored_keys) -> None:
186
+ """元数据广播,将 media_info 的元数据广播到 clip 中,以及调用 clip 自己的参数传播。"""
187
+ self.spread_metainfo_2_clip(target_keys=target_keys, ignored_keys=ignored_keys)
188
+ for clip in self.clipseq:
189
+ clip.spread_parameters()
190
+
191
+ def stat(
192
+ self,
193
+ ):
194
+ """统计 media_info 相关信息,便于了解,目前统计内容有
195
+ 1. 片段长度
196
+ """
197
+ self.stat_clipseq_duration()
198
+
199
+ def stat_clipseq_duration(
200
+ self,
201
+ ):
202
+ hist, bin_edges = stat_clipseq_duration(self.clipseq)
203
+ print(self.media_name, "bin_edges", bin_edges)
204
+ print(self.media_name, "hist", hist)
205
+
206
+ def to_dct(self, target_keys: list = None, ignored_keys: list = None):
207
+ raise NotImplementedError
208
+
209
+ @property
210
+ def duration(
211
+ self,
212
+ ):
213
+ return self.clipseq.duration
214
+
215
+ @property
216
+ def mediaid(
217
+ self,
218
+ ):
219
+ return self.meta_info.mediaid
220
+
221
+ @property
222
+ def media_name(
223
+ self,
224
+ ):
225
+ return self.meta_info.media_name
226
+
227
+ @property
228
+ def duration_seq_emb(self):
229
+ return self.clipseq.duration_seq_emb
230
+
231
+ @property
232
+ def timestamp_seq_emb(self):
233
+ return self.clipseq.timestamp_seq_emb
234
+
235
+ @property
236
+ def rela_timestamp_seq_emb(self):
237
+ return self.clipseq.rela_timestamp_seq_emb
238
+
239
+ def get_emb(self, key, idx=None):
240
+ # TODO: 待修改为更通用的形式
241
+ if idx is None:
242
+ idx = range(self.clip_start_idx, self.clip_end_idx + 1)
243
+ elif isinstance(idx, int):
244
+ idx += self.clip_start_idx
245
+ elif isinstance(idx, Iterable):
246
+ idx = [x + self.clip_start_idx for x in idx]
247
+ else:
248
+ raise ValueError(
249
+ f"idx only support None, int, Iterable, but given {idx},type is {type(idx)}"
250
+ )
251
+ return self.emb.get_value(key, idx=idx)
252
+
253
+ def get_meta_info_attr(self, key: str) -> Any:
254
+ return getattr(self.meta_info, key)
255
+
256
+ @classmethod
257
+ def from_json_path(
258
+ cls, path: Dict, emb_path: str, media_path: str = None, **kwargs
259
+ ) -> MediaMap:
260
+ media_map = load_dct_from_file(path)
261
+ emb = H5pyMediaMapEmb(emb_path)
262
+ return cls.from_data(media_map, emb=emb, media_path=media_path, **kwargs)
263
+
264
+
265
+ class MediaMapSeq(Items):
266
+ def __init__(self, maps: List[MediaMap]) -> None:
267
+ super().__init__(maps)
268
+ self.maps = self.data
269
+ self.preprocess()
270
+ self.each_map_clipseq_num = [len(m.clipseq) for m in self.maps]
271
+ self.each_map_clipseq_num_cumsum = np.cumsum([0] + self.each_map_clipseq_num)
272
+
273
+ @property
274
+ def clipseq(self):
275
+ clipseq = []
276
+ for m in self.maps:
277
+ clipseq.extend(m.clipseq.data)
278
+ return type(self.maps[0].clipseq)(clipseq)
279
+
280
+ @property
281
+ def stagesseq(self):
282
+ stagesseq = []
283
+ for m in self.maps:
284
+ stagesseq.extend(m.stagesseq.data)
285
+ return type(self.maps[0].stagesseq)(stagesseq)
286
+
287
+ @property
288
+ def frameseq(self):
289
+ frameseq = []
290
+ for m in self.maps:
291
+ frameseq.extend(m.frameseq.data)
292
+ return type(self.maps[0].frameseq)(frameseq)
293
+
294
+ def preprocess(self):
295
+ for m in self.maps:
296
+ m.preprocess()
297
+
298
+ def _combine_str(
299
+ self,
300
+ attrs: List[str],
301
+ sep: str = "|",
302
+ single_maxlen: int = 10,
303
+ total_max_length: int = 60,
304
+ ) -> str:
305
+ return sep.join([str(attr)[:single_maxlen] for attr in attrs])[
306
+ :total_max_length
307
+ ]
308
+
309
+ def get_meta_info_attr(self, key: str, func: Callable) -> Any:
310
+ attrs = [m.get_meta_info_attr(key) for m in self.maps]
311
+ return func(attrs)
312
+
313
+ @property
314
+ def mediaid(self) -> str:
315
+ return self.get_meta_info_attr(key="mediaid", func=self._combine_str)
316
+
317
+ @property
318
+ def media_name(self) -> str:
319
+ return self.get_meta_info_attr(key="media_name", func=self._combine_str)
320
+
321
+ @property
322
+ def duration(self) -> float:
323
+ return sum([m.duration for m in self.maps])
324
+
325
+ @property
326
+ def media_duration(self) -> float:
327
+ return self.get_meta_info_attr(key="media_duration", func=sum)
328
+
329
+ @classmethod
330
+ def from_json_paths(
331
+ cls,
332
+ media_map_class: MediaMap,
333
+ media_paths: str,
334
+ media_map_paths: str,
335
+ emb_paths: str,
336
+ **kwargs,
337
+ ) -> MediaMapSeq:
338
+ map_seq = [
339
+ media_map_class.from_json_path(
340
+ path=media_map_paths[i],
341
+ emb_path=emb_paths[i],
342
+ media_path=media_paths[i],
343
+ **kwargs,
344
+ )
345
+ for i in range(len(media_map_paths))
346
+ ]
347
+ return cls(map_seq)
348
+
349
+ # TODO: implement mapseq stat func
350
+ def stat(self):
351
+ for m in self.maps:
352
+ m.stat()
353
+
354
+ def _combine_embs(self, embs):
355
+ return np.concatenate(embs, axis=0)
356
+
357
+ @property
358
+ def duration_seq_emb(self):
359
+ embs = [m.duration_seq_emb for m in self.maps]
360
+ return self._combine_embs(embs)
361
+
362
+ @property
363
+ def timestamp_seq_emb(self):
364
+ embs = [m.timestamp_seq_emb for m in self.maps]
365
+ return self._combine_embs(embs)
366
+
367
+ @property
368
+ def rela_timestamp_seq_emb(self):
369
+ embs = [m.rela_timestamp_seq_emb for m in self.maps]
370
+ return self._combine_embs(embs)
371
+
372
+ def clip_idx_2_map_idx(self, idx):
373
+ target_map_idx = bisect.bisect_right(self.each_map_clipseq_num_cumsum, idx)
374
+ target_map_idx = min(max(0, target_map_idx - 1), len(self.maps) - 1)
375
+ target_map_clip_idx = idx - self.each_map_clipseq_num_cumsum[target_map_idx]
376
+ return target_map_idx, target_map_clip_idx
377
+
378
+ def get_emb(self, key: str, idx: Union[None, int, List[int]] = None) -> np.array:
379
+ if idx is None:
380
+ embs = [m.get_emb(key, idx=idx) for m in self.maps]
381
+ else:
382
+ if not isinstance(idx, list):
383
+ idx = [idx]
384
+ embs = []
385
+ for c_idx in idx:
386
+ target_map_idx, target_map_clip_idx = self.clip_idx_2_map_idx(c_idx)
387
+ embs.append(
388
+ self.maps[target_map_idx].get_emb(key, int(target_map_clip_idx))
389
+ )
390
+ if len(embs) == 1:
391
+ return embs[0]
392
+ else:
393
+ return self._combine_embs(embs)
MuseV/MMCM/mmcm/data/media_map/media_map_process.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Union, TYPE_CHECKING
4
+ from ..clip.clip_process import (
5
+ get_subseq_by_time,
6
+ find_time_by_stage,
7
+
8
+ )
9
+ if TYPE_CHECKING:
10
+ from ..media_map.media_map import MediaMap
11
+ from ..clip import Clip, ClipSeq
12
+
13
+
14
+ __all__ =[
15
+ "get_sub_mediamap_by_clip_idx",
16
+ "get_sub_mediamap_by_stage",
17
+ "get_sub_mediamap_by_time",
18
+ ]
19
+
20
+
21
+ def get_sub_mediamap_by_time(media_map:MediaMap, start: int=0, end:int=1, eps=1e-2) -> MediaMap:
22
+ """获取子片段序列,同时更新media_map中的相关信息
23
+
24
+ Args:
25
+ media_map (MediaInfo): _description_
26
+ start (float): 开始时间
27
+ end (float): 结束时间
28
+
29
+ Returns:
30
+ _type_: _description_
31
+ """
32
+ if start < 1:
33
+ start = media_map.duration * start
34
+ if end is None:
35
+ end = media_map.meta_info.media_duration
36
+ elif end <= 1:
37
+ end = media_map.duration * end
38
+ media_map.meta_info.start = start
39
+ media_map.meta_info.end = end
40
+ media_map.clipseq = get_subseq_by_time(
41
+ media_map.clipseq,
42
+ start=start,
43
+ end=end,
44
+ )
45
+ if media_map.stageseq is not None:
46
+ media_map.stageseq = get_subseq_by_time(media_map.stageseq, start=start, end=end)
47
+ return media_map
48
+
49
+
50
+ def get_sub_mediamap_by_clip_idx(media_map: MediaMap, start: int=None, end: int=None) -> MediaMap:
51
+ """不仅获取子片段序列,还要更新media_map中的相关信息
52
+
53
+ Args:
54
+ media_map (_type_): _description_
55
+ """
56
+ if start is None:
57
+ start = 0
58
+ if end is None:
59
+ end = -1
60
+ start = media_map.clipseq[start].time_start
61
+ end = media_map.clipseq[end].time_end
62
+ media_map = get_sub_mediamap_by_time(media_map=media_map, start=start, end=end)
63
+ return media_map
64
+
65
+
66
+ def get_sub_mediamap_by_stage(media_map: MediaMap, stages: Union[str, List[str]]) -> MediaMap:
67
+ if isinstance(stages, List):
68
+ stages = [stages]
69
+ start, _ = find_time_by_stage(media_map.stageseq, stages[0])
70
+ _, end = find_time_by_stage(media_map.stageseq, stages[-1])
71
+ media_map = get_sub_mediamap_by_time(media_map=media_map, start=start, end=end)
72
+ return media_map
MuseV/MMCM/mmcm/music/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from .music_map.music_map import MusicMap, MusicMapSeq
2
+ from .music_map.music_clip import MusicClip, MusicClipSeq
3
+ from .music_map.meta_info import MusicMetaInfo
4
+ from .music_map.load_music_map import load_music_map
5
+
6
+ from .utils.path_util import get_audio_path_dct
MuseV/MMCM/mmcm/music/music_map/__init__.py ADDED
File without changes
MuseV/MMCM/mmcm/music/music_map/beat_map.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from librosa.core.audio import get_duration
4
+
5
+ from ...data.clip.clip_process import insert_endclip, insert_startclip
6
+
7
+ from .clip_process import filter_clipseq_target_point
8
+ from .music_clip import MusicClip, MusicClipSeq
9
+
10
+
11
+ def beatnet2TMEType(beat: np.array, duration: float) -> MusicClipSeq:
12
+ """conver beatnet beat to tme beat type
13
+
14
+ Args:
15
+ beat (np.array): Nx2,
16
+ 1st column is time,
17
+ 2rd is type,
18
+ 0, end point
19
+ 1, strong beat
20
+ 2,3,4 weak beat
21
+ -1 lyric
22
+ duration (float): audio time length
23
+ Returns:
24
+ MusicClipSeq:
25
+ """
26
+ n = len(beat)
27
+ beat = np.insert(beat, 0, 0, axis=0)
28
+ beat = np.insert(beat, n + 1, [duration, 0], axis=0)
29
+ clips = []
30
+ for i in range(n + 1):
31
+ beat_type = int(beat[i + 1, 1])
32
+ clip = MusicClip(
33
+ time_start=beat[i, 0], # 开始时间
34
+ duration=round(beat[i + 1, 0] - beat[i, 0], 3), # 片段持续时间
35
+ clipid=i, # 片段序号,
36
+ timepoint_type=beat_type,
37
+ )
38
+ clips.append(clip)
39
+ clipseq = MusicClipSeq(clips=clips)
40
+ return clipseq
41
+
42
+
43
+ def generate_beatseq_with_beatnet(audio_path: str) -> np.array:
44
+ """使用beatnet生成beat序列
45
+
46
+ Args:
47
+ audio_path (str):
48
+ Returns:
49
+ np.array: beat序列 Nx2,
50
+ 1st column is time,
51
+ 2rd is type,
52
+ 0, end point
53
+ 1, strong beat
54
+ 2,3,4 weak beat
55
+ """
56
+ from BeatNet.BeatNet import BeatNet
57
+
58
+ estimator = BeatNet(1, mode="offline", inference_model="DBN", plot=[], thread=False)
59
+ output = estimator.process(audio_path=audio_path)
60
+ return output
61
+
62
+
63
+ def generate_music_map_with_beatnet(
64
+ audio_path: str, target: list = [0, 1]
65
+ ) -> MusicClipSeq:
66
+ """使用beatnet生成beat MusicClipseq
67
+
68
+ Args:
69
+ audio_path (str):
70
+ target (list, optional): 只保留相应的拍点. Defaults to [0, 1].
71
+
72
+ Returns:
73
+ MusicClipSeq: 返回的beat序列
74
+ beat: np.array, 原始的beat检测结果
75
+ """
76
+ output = generate_beatseq_with_beatnet(audio_path)
77
+ duration = get_duration(filename=audio_path)
78
+ clipseq = beatnet2TMEType(output, duration)
79
+ clipseq = insert_startclip(clipseq)
80
+ clipseq = insert_endclip(clipseq, duration)
81
+ clipseq = filter_clipseq_target_point(clipseq, target=target)
82
+ return clipseq, output
MuseV/MMCM/mmcm/music/music_map/clip_process.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import TYPE_CHECKING, Dict, List
3
+
4
+ import numpy as np
5
+
6
+ from ...data.clip.clip_process import find_idx_by_time, reset_clipseq_id
7
+ from ...data.clip.clip_fusion import fuse_clips
8
+ from ...utils.util import merge_list_continuous_same_element
9
+
10
+ if TYPE_CHECKING:
11
+ from .music_clip import MusicClip, MusicClipSeq
12
+ from .music_map import MusicMap, MusicMapSeq
13
+
14
+
15
+ # TODO: 待和clip操作做整合
16
+ def music_clip_is_short(clip: MusicClip, th: float = 3) -> bool:
17
+ """判断音乐片段是否过短
18
+
19
+ Args:
20
+ clip (MusicClip): 待判断的音乐片段
21
+ th (float, optional): 短篇的参数. Defaults to 3.
22
+
23
+ Returns:
24
+ bool: 是或不是 短片段
25
+ """
26
+ if clip.duration < th:
27
+ return False
28
+ else:
29
+ return True
30
+
31
+
32
+ def music_clip_timepoint_is_target(clip: MusicClip, target: list = [-1, 1, 0]) -> bool:
33
+ """音乐片段的关键点类型是否是目标关键点
34
+ 关键点类型暂时参考:VideoMashup/videomashup/data_structure/music_data_structure.py
35
+ Args:
36
+ clip (MusicClip): 待判断的音乐片段
37
+ target (list, optional): 目标关键点类别. Defaults to [-1, 1, 0].
38
+
39
+ Returns:
40
+ bool: 是还是不是
41
+ """
42
+ timepoint = clip.timepoint_type
43
+ if isinstance(timepoint, int):
44
+ timepoint = {timepoint}
45
+ else:
46
+ timepoint = {int(x) for x in timepoint.split("_")}
47
+ if timepoint & set(target):
48
+ return True
49
+ else:
50
+ return False
51
+
52
+
53
+ def filter_clipseq_target_point(
54
+ clipseq: MusicClipSeq, target: list = [-1, 1, 0]
55
+ ) -> MusicClipSeq:
56
+ """删除目标关键点之外的点,对相应的片段做融合
57
+
58
+ Args:
59
+ clipseq (MusicClipSeq): 待处理的音乐片段序列
60
+ target (list, optional): 保留的目标关键点. Defaults to [-1, 1, 0].
61
+
62
+ Returns:
63
+ MusicClipSeq: 处理后的音乐片段序列
64
+ """
65
+ n_clipseq = len(clipseq)
66
+ if n_clipseq == 1:
67
+ return clipseq
68
+ newclipseq = []
69
+ start_clip = clipseq[0]
70
+ if music_clip_timepoint_is_target(start_clip, target=target):
71
+ has_start_clip = True
72
+ else:
73
+ has_start_clip = False
74
+ i = 1
75
+ while i <= n_clipseq - 1:
76
+ clip = clipseq[i]
77
+ start_clip_is_target = music_clip_timepoint_is_target(start_clip, target=target)
78
+ next_clip_is_target = music_clip_timepoint_is_target(clip, target=target)
79
+ # logger.debug("filter_clipseq_target_point: i={},start={}, clip={}".format(i, start_clip["timepoint_type"], clip["timepoint_type"]))
80
+ # logger.debug("start_clip_is_target: {}, next_clip_is_target {}".format(start_clip_is_target, next_clip_is_target))
81
+ if not has_start_clip:
82
+ start_clip = clip
83
+ has_start_clip = next_clip_is_target
84
+ else:
85
+ if start_clip_is_target:
86
+ has_start_clip = True
87
+ if next_clip_is_target:
88
+ newclipseq.append(start_clip)
89
+ start_clip = clip
90
+ if i == n_clipseq - 1:
91
+ newclipseq.append(clip)
92
+ else:
93
+ start_clip = fuse_clips(start_clip, clip)
94
+ if i == n_clipseq - 1:
95
+ newclipseq.append(start_clip)
96
+ # logger.debug("filter_clipseq_target_point: fuse {}, {}".format(i, clip["timepoint_type"]))
97
+ else:
98
+ start_clip = clip
99
+ i += 1
100
+ newclipseq = reset_clipseq_id(newclipseq)
101
+ return newclipseq
102
+
103
+
104
+ def merge_musicclip_into_clipseq(
105
+ clip: MusicClipSeq, clipseq: MusicClip, th: float = 1
106
+ ) -> MusicClipSeq:
107
+ """给clipseq插入一个新的音乐片段,会根据插入后片段是否过短来判断。
108
+
109
+ Args:
110
+ clip (MusicClipSeq): 要插入的音乐片段
111
+ clipseq (MusicClip): 待插入的音乐片段序列
112
+ th (float, optional): 插入后如果受影响的片段长度过短,则放弃插入. Defaults to 1.
113
+
114
+ Returns:
115
+ MusicClipSeq: _description_
116
+ """
117
+ n_clipseq = len(clipseq)
118
+ clip_time = clip.time_start
119
+ idx = find_idx_by_time(clipseq, clip_time)
120
+ last_clip_time_start = clipseq[idx].time_start
121
+ next_clip_time_start = clipseq[idx].time_start + clipseq[idx].duration
122
+ last_clip_time_delta = clip_time - last_clip_time_start
123
+ clip_duration = next_clip_time_start - clip_time
124
+ # TODO: 副歌片段改变th参数来提升音符密度,暂不使用,等待音游谱面
125
+ # TODO: 待抽离独立的业务逻辑为单独的函数
126
+ # 只针对副歌片段插入关键点
127
+ if clipseq[idx].text is None or (
128
+ clipseq[idx].text is not None
129
+ and clipseq[idx].stage is not None
130
+ and "C" in clipseq[idx].stage
131
+ ):
132
+ if (last_clip_time_delta > th) and (clip_duration > th):
133
+ clip.duration = clip_duration
134
+ clipseq[idx].duration = last_clip_time_delta
135
+ clipseq.insert(idx + 1, clip)
136
+ clipseq = reset_clipseq_id(clipseq)
137
+ return clipseq
138
+
139
+
140
+ def merge_music_clipseq(clipseq1: MusicClipSeq, clipseq2: MusicClipSeq) -> MusicClipSeq:
141
+ """将片段序列clipseq2融合到音乐片段序列clipseq1中。融合过程也会判断新片段长度。
142
+
143
+ Args:
144
+ clipseq1 (MusicClipSeq): 要融合的目标音乐片段序列
145
+ clipseq2 (MusicClipSeq): 待融合的音乐片段序列
146
+
147
+ Returns:
148
+ MusicClipSeq: 融合后的音乐片段序列
149
+ """
150
+ while len(clipseq2) > 0:
151
+ clip = clipseq2[0]
152
+ clipseq1 = merge_musicclip_into_clipseq(clip, clipseq1)
153
+ del clipseq2[0]
154
+ return clipseq1
155
+
156
+
157
+ def merge_lyricseq_beatseq(
158
+ lyric_clipseq: MusicClipSeq, beat_clipseq: MusicClipSeq
159
+ ) -> MusicClipSeq:
160
+ """将beat序列融合到歌词序列中
161
+
162
+ Args:
163
+ lyric_clipseq (MusicClipSeq): 歌词序列
164
+ beat_clipseq (MusicClipSeq): beat序列
165
+
166
+ Returns:
167
+ MusicClipSeq: 融合后的音乐片段序列
168
+ """
169
+ newclipseq = merge_music_clipseq(lyric_clipseq, beat_clipseq)
170
+ # for i, clip in enumerate(newclipseq):
171
+ # logger.debug("i={}, time_start={}, duration={}".format(i, clip.time_start, clip.duration))
172
+ return newclipseq
173
+
174
+
175
+ def get_stageseq_from_clipseq(clipseq: MusicClipSeq) -> List[Dict]:
176
+ """对clip.stage做近邻融合,返回总时间
177
+
178
+ Returns:
179
+ List[Dict]: 根据音乐结构进行分割的片段序列
180
+ """
181
+ stages = [clip.stage for clip in clipseq]
182
+ merge_stages_idx = merge_list_continuous_same_element(stages)
183
+ merge_stages = []
184
+ for n, stages_idx in enumerate(merge_stages_idx):
185
+ dct = {
186
+ "clipid": n,
187
+ "time_start": clipseq[stages_idx["start"]].time_start,
188
+ "time_end": clipseq[stages_idx["end"]].time_end,
189
+ "stage": stages_idx["element"],
190
+ "original_clipid": list(
191
+ range(stages_idx["start"], stages_idx["end"] + 1)
192
+ ), # mss都是左闭、 右闭的方式
193
+ }
194
+ dct["duration"] = dct["time_end"] - dct["time_start"]
195
+ merge_stages.append(dct)
196
+ return merge_stages
MuseV/MMCM/mmcm/music/music_map/convert_type.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ...data.clip.clip_process import (
2
+ insert_startclip,
3
+ insert_endclip,
4
+ reset_clipseq_id,
5
+ )
6
+
7
+ from .music_clip import MusicClip, MusicClipSeq
8
+
9
+
10
+ def read_osu_hitobjs(path: str) -> list:
11
+ """读取osu的音游谱面
12
+
13
+ Args:
14
+ path (str): 谱面低质
15
+
16
+ Returns:
17
+ list: 只包含HitObjects的行字符串信息
18
+ """
19
+ lines = []
20
+ is_hit_info_start = False
21
+ with open(path, "r") as f:
22
+ for line in f:
23
+ if is_hit_info_start:
24
+ lines.append(line.strip())
25
+ if "[HitObjects]" in line:
26
+ is_hit_info_start = True
27
+ return lines
28
+
29
+
30
+ def osu2itech(src: list, duration: float = None) -> MusicClipSeq:
31
+ """将osu的音游谱面转换为我们的目标格式
32
+
33
+ Args:
34
+ src (list): 音游谱面路径或者是读取的目标行字符串列表
35
+ duration (float, optional): 歌曲长度. Defaults to None.
36
+
37
+ Returns:
38
+ MusicClipSeq: 音乐片段序列
39
+ """
40
+ if isinstance(src, str):
41
+ src = read_osu_hitobjs(src)
42
+ timepoints = [float(line.split(",")[2]) for line in src]
43
+ clips = []
44
+ for i in range(len(timepoints) - 1):
45
+ clip = MusicClip(
46
+ time_start=round(timepoints[i] / 1000, 3),
47
+ timepoint_type=0,
48
+ duration=round((timepoints[i + 1] - timepoints[i]) / 1000, 3),
49
+ clipid=i,
50
+ )
51
+ clips.append(clip)
52
+ if len(clips) > 0:
53
+ clips = insert_startclip(clips)
54
+ if duration is not None:
55
+ clips = insert_endclip(clips, duration=duration)
56
+ clips = reset_clipseq_id(clips)
57
+ return MusicClipSeq(clips)
MuseV/MMCM/mmcm/music/music_map/load_music_map.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ from typing import List
4
+
5
+ from .music_map import MusicMap, MusicMapSeq
6
+
7
+
8
+ def load_music_map(
9
+ music_map_paths,
10
+ music_paths,
11
+ emb_paths,
12
+ start: float=None,
13
+ end: None=None,
14
+ target_stages: List[str] = None,
15
+ **kwargs,
16
+ ):
17
+ """读取视频谱面,转化成MusicInfo。当 musicinfo_path_lst 为列表时,表示多歌曲
18
+
19
+ Args:
20
+ musicinfo_path_lst (str or [str]): 视频谱面路径文件列表
21
+ music_path_lst (str or [str]): 视频文件路径文件列表,须与musicinfo_path_lst等长度
22
+
23
+
24
+ Returns:
25
+ MusicInfo: 视频谱面信息
26
+ """
27
+ dct ={
28
+ "start": start,
29
+ "end": end,
30
+ "target_stages": target_stages,
31
+ }
32
+ if isinstance(music_map_paths, list):
33
+ music_map = MusicMapSeq.from_json_paths(media_map_class=MusicMapSeq, media_paths=music_paths, media_map_paths=music_map_paths, emb_paths=emb_paths, **dct, **kwargs)
34
+ if len(music_map) == 1:
35
+ music_map = music_map[0]
36
+ else:
37
+ music_map = MusicMap.from_json_path(path=music_map_paths, emb_path=emb_paths, media_path=music_paths, **dct, **kwargs)
38
+ return music_map
MuseV/MMCM/mmcm/music/music_map/lyric_map.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.preprocessing import normalize, minmax_scale
3
+ from scipy.signal import savgol_filter
4
+
5
+ # TODO:待更新音乐谱面的类信息
6
+ from ...data.clip.clip_process import (
7
+ complete_clipseq,
8
+ find_idx_by_clip,
9
+ insert_endclip,
10
+ insert_startclip,
11
+ reset_clipseq_id,
12
+ )
13
+
14
+ from .music_clip import Clip, ClipSeq
15
+ from .music_clip import MusicClipSeq
16
+ from .music_map import MusicMap
17
+
18
+
19
+ def generate_lyric_map(
20
+ path: str, duration: float = None, gap_th: float = 2
21
+ ) -> MusicClipSeq:
22
+ """从歌词文件中生成音乐谱面
23
+
24
+ Args:
25
+ path (str): 歌词文件路径
26
+ duration (float, optional): 歌词对应音频的总时长. Defaults to None.
27
+ gap_th (float, optional): 歌词中间的空白部分是否融合到上一个片段中. Defaults to 3.
28
+
29
+ Returns:
30
+ MusicClipSeq: 以歌词文件生成的音乐谱面
31
+ """
32
+ from ..music_map.lyric_process import lyricfile2musicinfo
33
+
34
+ lyric_info = lyricfile2musicinfo(path)
35
+ lyric_info = MusicMap(lyric_info, duration=duration)
36
+ clipseq = lyric_info.clipseq
37
+ lyric_info.meta_info.duration = duration
38
+ # set part of nonlyric as clip whose timepoint is 0
39
+ for i in range(len(clipseq)):
40
+ clipseq[i].timepoint_type = -1
41
+ lyric_info.clipseq = complete_clipseq(
42
+ clipseq=clipseq, duration=duration, gap_th=gap_th
43
+ )
44
+ return lyric_info
45
+
46
+
47
+ def insert_field_2_clipseq(clipseq: ClipSeq, reference: ClipSeq, field: str) -> ClipSeq:
48
+ """将reference中每个clip的字段信息根据赋给clipseq中最近的clip
49
+
50
+ Args:
51
+ clipseq (ClipSeq): 目标clip序列
52
+ reference (ClipSeq): 参考clip序列
53
+ field (str): 目标字段
54
+
55
+ Returns:
56
+ ClipSeq: 更新目标字段新值后的clip序列
57
+ """
58
+ for i, clip in enumerate(clipseq):
59
+ idx = find_idx_by_clip(reference, clip=clip)
60
+ if idx is not None:
61
+ if getattr(reference[idx], field) is not None:
62
+ clipseq[i].__dict__[field] = getattr(reference[idx], field)
63
+ return clipseq
64
+
65
+
66
+ def insert_rythm_2_clipseq(clipseq, reference):
67
+ """参考MSS字段的结构信息设置rythm信息。目前策略非常简单,主歌(Vx)0.25,副歌(Cx)0.75,其他为None
68
+
69
+ Args:
70
+ clipseq (ClipSeq): 目标clip序列,设置rythm字段
71
+ reference (ClipSeq): 参考clip序列,参考stage字段
72
+
73
+ Returns:
74
+ ClipSeq: 更新rythm字段新值后的clip序列
75
+ """
76
+
77
+ def stage2rythm(stage):
78
+ if "V" in stage:
79
+ return 0.25
80
+ elif "C" in stage:
81
+ return 0.75
82
+ else:
83
+ return None
84
+
85
+ for i, clip in enumerate(clipseq):
86
+ idx = find_idx_by_clip(reference, clip=clip)
87
+ if idx is not None:
88
+ if reference[idx].rythm is not None:
89
+ clipseq[i].rythm = stage2rythm(reference[idx].stage)
90
+ return clipseq
91
+
92
+
93
+ def insert_rythm_from_clip(clipseq: MusicClipSeq, beat: np.array) -> MusicClipSeq:
94
+ """给MusicClipSeq中的每个Clip新增节奏信息。目前使用
95
+ 1. 单位时间内的歌词数量特征, 使用 min-max 归一化到 0 - 1 之间
96
+ 2. 单位时间内的关键点数量,目前使用beatnet,使用 min-max 归一化到 0 - 1 之间
97
+ 3. 对1、2中的特征相加,并根据歌曲结构不同进行加权
98
+ Args:
99
+ clipseq (MusicClipSeq): 待处理的 MusicClipSeq
100
+ beat (np.array): beat检测结果,Nx2,,用于结算单位时间内的关键点数。
101
+ 1st column is time,
102
+ 2rd is type,
103
+ 0, end point
104
+ 1, strong beat
105
+ 2,3,4 weak beat
106
+
107
+ Returns:
108
+ MusicClipSeq: 新增 rythm 的 MusicClipSeq
109
+ """
110
+ mss_cofficient = {
111
+ "intro": 1.0,
112
+ "bridge": 1.0,
113
+ "end": 0.8,
114
+ "VA": 1.0,
115
+ "VB": 1.0,
116
+ "CA": 1.6,
117
+ "CB": 1.6,
118
+ }
119
+ # text_num_per_second
120
+ text_num_per_second_lst = [clip.tnps for clip in clipseq if clip.tnps != 0]
121
+ common_tnps = np.min(text_num_per_second_lst)
122
+ tnps = np.array([clip.tnps if clip.tnps != 0 else common_tnps for clip in clipseq])
123
+ tnps = minmax_scale(tnps)
124
+ # beat point _num_per_second
125
+ beat_pnps = np.zeros(len(clipseq))
126
+ for i, clip in enumerate(clipseq):
127
+ time_start = clip.time_start
128
+ time_end = clip.time_end
129
+ target_beat = beat[(beat[:, 0] >= time_start) & (beat[:, 0] < time_end)]
130
+ beat_pnps[i] = len(target_beat) / clip.duration
131
+ beat_pnps = minmax_scale(beat_pnps)
132
+
133
+ # cofficient
134
+ cofficients = np.array(
135
+ [
136
+ mss_cofficient[clip.stage]
137
+ if clip.stage in mss_cofficient and clip.stage is not None
138
+ else 1.0
139
+ for clip in clipseq
140
+ ]
141
+ )
142
+
143
+ rythm = cofficients * (tnps + beat_pnps)
144
+ rythm = minmax_scale(rythm)
145
+ rythm = savgol_filter(rythm, window_length=5, polyorder=3)
146
+ rythm = minmax_scale(rythm)
147
+ for i, clip in enumerate(clipseq):
148
+ clip.dynamic = rythm[i]
149
+ return clipseq
MuseV/MMCM/mmcm/music/music_map/lyric_process.py ADDED
@@ -0,0 +1,515 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from genericpath import isfile
2
+ import re
3
+ import os
4
+
5
+ from ...text.utils.read_text import read_xml2json
6
+
7
+
8
+ # 一个正则表达式非常好用的网站
9
+ # https://regex101.com/r/cW8jA6/2
10
+
11
+
12
+ CHINESE_PATTERN = r"[\u4e00-\u9fff]+"
13
+ NOT_CHINESE_PATTERN = r"[^\u4e00-\u9fa5]"
14
+ ENGLISH_CHARACHTER_PATTERN = r"[a-zA-Z]+"
15
+ WORD_PATTERN = r"\w+" # equal to [a-zA-Z0-9_].
16
+ NOT_WORD_PATTERN = r"\W+"
17
+
18
+
19
+ def has_target_string(lyric: str, pattern: str) -> bool:
20
+ """本句歌词是否有目标字符串
21
+
22
+ Args:
23
+ lyric (str):
24
+ pattern (str): 目标字符串的正则表达式式patteren
25
+
26
+ Returns:
27
+ bool: 有没有目标字符串
28
+ """
29
+ matched = re.findall(pattern, lyric)
30
+ flag = len(matched) > 0
31
+ return flag
32
+
33
+
34
+ def has_chinese_char(lyric: str) -> bool:
35
+ """是否有中文字符
36
+
37
+ Args:
38
+ lyric (str):
39
+
40
+ Returns:
41
+ bool: 是否有中文字符
42
+ """
43
+ return has_target_string(lyric, CHINESE_PATTERN)
44
+
45
+
46
+ def has_non_chinese_char(lyric: str) -> bool:
47
+ """是否有非中文字符,参考https://git.woa.com/innovative_tech/CopyrightGroup/LyricTools/blob/master/lyric_tools/dataProcess.py#L53
48
+
49
+ Args:
50
+ lyric (str):
51
+
52
+ Returns:
53
+ bool: 是否有中文字符
54
+ """
55
+ return has_target_string(lyric, NOT_CHINESE_PATTERN)
56
+
57
+
58
+ def has_english_alphabet_char(lyric: str) -> bool:
59
+ """是否有英文字母表字符
60
+
61
+ Args:
62
+ lyric (str):
63
+
64
+ Returns:
65
+ bool:
66
+ """
67
+ return has_target_string(lyric, ENGLISH_CHARACHTER_PATTERN)
68
+
69
+
70
+ def check_is_lyric_row(lyric: str) -> bool:
71
+ """该字符串是否是歌词
72
+
73
+ Args:
74
+ lyric (str): 待判断的字符串
75
+
76
+ Returns:
77
+ bool: 该字符串是否是歌词
78
+ """
79
+ is_not_lyric = [
80
+ re.search(r"\[ti[::]?", lyric),
81
+ re.search(r"\[ar[::]?", lyric),
82
+ re.search(r"\[al[::]?", lyric),
83
+ re.search(r"\[by[::]?", lyric),
84
+ re.search(r"\[offset[::]?", lyric),
85
+ re.search(r"词[::]?\(\d+,\d+\)[::]?", lyric),
86
+ re.search(r"曲[::]?\(\d+,\d+\)[::]?", lyric),
87
+ re.search(r"作\(\d+,\d+\)词[::]?", lyric),
88
+ re.search(r"作\(\d+,\d+\)曲[::]?", lyric),
89
+ re.search(r"演\(\d+,\d+\)唱[::]?", lyric),
90
+ re.search(r"编\(\d+,\d+\)曲[::]?", lyric),
91
+ re.search(r"吉\(\d+,\d+\)他[::]", lyric),
92
+ re.search(r"人\(\d+,\d+\)声\(\d+,\d+\)录\(\d+,\d+\)音\(\d+,\d+\)师[::]?", lyric),
93
+ re.search(r"人\(\d+,\d+\)声\(\d+,\d+\)录\(\d+,\d+\)音\(\d+,\d+\)棚[::]?", lyric),
94
+ re.search(r"Vocal\s+\(\d+,\d+\)edite[::]?", lyric),
95
+ re.search(r"混\(\d+,\d+\)音\(\d+,\d+\)/\(\d+,\d+\)母\(\d+,\d+\)带[::]?", lyric),
96
+ re.search(r"混\(\d+,\d+\)音", lyric),
97
+ re.search(r"和\(\d+,\d+\)声\(\d+,\d+\)编\(\d+,\d+\)写[::]?", lyric),
98
+ re.search(
99
+ r"词\(\d+,\d+\)版\(\d+,\d+\)权\(\d+,\d+\)管\(\d+,\d+\)理\(\d+,\d+\)方[::]?", lyric
100
+ ),
101
+ re.search(
102
+ r"曲\(\d+,\d+\)版\(\d+,\d+\)权\(\d+,\d+\)管\(\d+,\d+\)理\(\d+,\d+\)方[::]?", lyric
103
+ ),
104
+ re.search(r"联\(\d+,\d+\)合\(\d+,\d+\)出\(\d+,\d+\)品[::]?", lyric),
105
+ re.search(r"录\(\d+,\d+\)音\(\d+,\d+\)作\(\d+,\d+\)品", lyric),
106
+ re.search(
107
+ r"录\(\d+,\d+\)音\(\d+,\d+\)作\(\d+,\d+\)品\(\d+,\d+\)监\(\d+,\d+\)制[::]?", lyric
108
+ ),
109
+ re.search(r"制\(\d+,\d+\)作\(\d+,\d+\)人[::]?", lyric),
110
+ re.search(r"制\(\d+,\d+\)作\(\d+,\d+\)人[::]?", lyric),
111
+ re.search(r"不\(\d+,\d+\)得\(\d+,\d+\)翻\(\d+,\d+\)唱", lyric),
112
+ re.search(r"未\(\d+,\d+\)经\(\d+,\d+\)许\(\d+,\d+\)可", lyric),
113
+ re.search(r"酷\(\d+,\d+\)狗\(\d+,\d+\)音\(\d+,\d+\)乐", lyric),
114
+ re.search(r"[::]", lyric),
115
+ ]
116
+ is_not_lyric = [x is not None for x in is_not_lyric]
117
+ is_not_lyric = any(is_not_lyric)
118
+ is_lyric = not is_not_lyric
119
+ return is_lyric
120
+
121
+
122
+ def lyric2clip(lyric: str) -> dict:
123
+ """convert a line of lyric into a clip
124
+ Clip定义可以参考 https://git.woa.com/innovative_tech/VideoMashup/blob/master/videomashup/media/clip.py
125
+ Args:
126
+ lyric (str): _description_
127
+
128
+ Returns:
129
+ dict: 转化成Clip 字典
130
+ """
131
+ time_str_groups = re.findall(r"\d+,\d+", lyric)
132
+ line_time_start = round(int(time_str_groups[0].split(",")[0]) / 1000, 3)
133
+ line_duration = round(int(time_str_groups[0].split(",")[-1]) / 1000, 3)
134
+ line_end_time = line_time_start + line_duration
135
+ last_word_time_start = round(int(time_str_groups[-1].split(",")[0]) / 1000, 3)
136
+ last_word_duration = round(int(time_str_groups[-1].split(",")[-1]) / 1000, 3)
137
+ last_word_end_time = last_word_time_start + last_word_duration
138
+ actual_duration = min(line_end_time, last_word_end_time) - line_time_start
139
+ lyric = re.sub(r"\[\d+,\d+\]", "", lyric)
140
+
141
+ # by yuuhong: 把每个字的起始时间点、结束时间点、具体的字拆分出来
142
+ words_with_timestamp = get_words_with_timestamp(lyric)
143
+
144
+ lyric = re.sub(r"\(\d+,\d+\)", "", lyric)
145
+ dct = {
146
+ "time_start": line_time_start,
147
+ "duration": actual_duration,
148
+ "text": lyric,
149
+ "original_text": lyric,
150
+ "timepoint_type": -1,
151
+ "clips": words_with_timestamp,
152
+ }
153
+ return dct
154
+
155
+
156
+ # by yuuhong
157
+ # 把一句QRC中的每个字拆分出来
158
+ # lyric示例:漫(17316,178)步(17494,174)走(17668,193)在(17861,183) (18044,0)莎(18044,153)玛(18197,159)丽(18356,176)丹(18532,200)
159
+ def get_words_with_timestamp(lyric):
160
+ words_with_timestamp = []
161
+ elements = lyric.split(")")
162
+ for element in elements:
163
+ sub_elements = element.split("(")
164
+ if len(sub_elements) != 2:
165
+ continue
166
+ text = sub_elements[0]
167
+ timestamp = sub_elements[1]
168
+ if re.match(r"\d+,\d+", timestamp):
169
+ # 有效时间戳
170
+ time_start_str = timestamp.split(",")[0]
171
+ time_start = round(int(time_start_str) / 1000, 3)
172
+ duration_str = timestamp.split(",")[1]
173
+ duration = round(int(duration_str) / 1000, 3)
174
+ clip = {"text": text, "time_start": time_start, "duration": duration}
175
+ words_with_timestamp.append(clip)
176
+ return words_with_timestamp
177
+
178
+
179
+ def lyric2clips(lyric: str, th: float = 0.75) -> list:
180
+ """将一句歌词转换为至少1个的clip。拆分主要是针对中文空格拆分,如果拆分后片段过短,也会整句处理。
181
+ Args:
182
+ lyric (str): such as [173247,3275]去(173247,403)吗(173649,677) 配(174326,189)吗(174516,593) 这(175108,279)
183
+ th (float, optional): 后面如果拆分后片段过短,也会整句处理. Defaults to 1.0.
184
+
185
+ Returns:
186
+ list: 歌词Clip序列
187
+ """
188
+ # 目前只对中文的一句歌词按照空格拆分,如果是英文空格则整句处理
189
+ # 后面如果拆分后片段过短,也会整句处理
190
+ if has_english_alphabet_char(lyric):
191
+ return [lyric2clip(lyric)]
192
+ splited_lyric = lyric.split(" ")
193
+ if len(splited_lyric) == 1:
194
+ return [lyric2clip(splited_lyric[0])]
195
+ line_time_str, sub_lyric = re.split(r"]", splited_lyric[0])
196
+ line_time_groups = re.findall(r"\d+,\d+", line_time_str)
197
+ line_time_start = round(int(line_time_groups[0].split(",")[0]) / 1000, 3)
198
+ line_duration = round(int(line_time_groups[0].split(",")[-1]) / 1000, 3)
199
+ splited_lyric[0] = sub_lyric
200
+ # 歌词xml都是歌词仅跟着时间,如果有空格 空格也应该是在时间后面,但有时候空格却在字后面、在时间前,因此需要修正
201
+ # 错误的:[173247,3275]去(173247,403)吗 (173649,677)配(174326,189)吗 (174516,593)这(175108,279)
202
+ # 错误的:[46122,2082]以(46122,213)身(46335,260)淬(46595,209)炼(46804,268)天(47072,250)地(47322,370)造(47692,341)化 (48033,172)
203
+ # 修正成:[173247,3275]去(173247,403)吗(173649,677) 配(174326,189)吗(174516,593) 这(175108,279)
204
+ for i in range(len(splited_lyric)):
205
+ if splited_lyric[i] == "":
206
+ del splited_lyric[i]
207
+ break
208
+ if splited_lyric[i][-1] != ")":
209
+ next_lyric_time_start = re.search(
210
+ r"\(\d+,\d+\)", splited_lyric[i + 1]
211
+ ).group(0)
212
+ splited_lyric[i] += next_lyric_time_start
213
+ splited_lyric[i + 1] = re.sub(
214
+ next_lyric_time_start, "", splited_lyric[i + 1]
215
+ )
216
+ splited_lyric[i + 1] = re.sub("\(\)", "", splited_lyric[i + 1])
217
+ lyric_text = re.sub(r"\[\d+,\d+\]", "", lyric)
218
+ lyric_text = re.sub(r"\(\d+,\d+\)", "", lyric_text)
219
+ clips = []
220
+ has_short_clip = False
221
+ for sub_lyric in splited_lyric:
222
+ sub_lyric_groups = re.findall(r"\d+,\d+", sub_lyric)
223
+ sub_lyric_1st_word_time_start = round(
224
+ int(sub_lyric_groups[0].split(",")[0]) / 1000, 3
225
+ )
226
+ sub_lyric_last_word_time_start = round(
227
+ int(sub_lyric_groups[-1].split(",")[0]) / 1000, 3
228
+ )
229
+ sub_lyric_last_word_duration = round(
230
+ int(sub_lyric_groups[-1].split(",")[-1]) / 1000, 3
231
+ )
232
+ sub_lyric_last_word_time_end = (
233
+ sub_lyric_last_word_time_start + sub_lyric_last_word_duration
234
+ )
235
+ sub_lyric_duration = (
236
+ sub_lyric_last_word_time_end - sub_lyric_1st_word_time_start
237
+ )
238
+ if sub_lyric_duration <= th:
239
+ has_short_clip = True
240
+ break
241
+ sub_lyric_text = re.sub(r"\[\d+,\d+\]", "", sub_lyric)
242
+ sub_lyric_text = re.sub(r"\(\d+,\d+\)", "", sub_lyric_text)
243
+ # 使用原始lyric,而不是sub_lyric_text 主要是保留相关clip的歌词信息,便于语义连续
244
+ dct = {
245
+ "time_start": sub_lyric_1st_word_time_start,
246
+ "duration": sub_lyric_duration,
247
+ "text": sub_lyric_text,
248
+ "original_text": lyric_text,
249
+ "timepoint_type": -1,
250
+ }
251
+ clips.append(dct)
252
+ if has_short_clip:
253
+ clips = [lyric2clip(lyric)]
254
+ return clips
255
+
256
+
257
+ def is_songname(lyric: str) -> bool:
258
+ """是否是歌名,歌名文本含有ti, 如[ti:霍元甲 (《霍元甲》电影主题曲)]
259
+
260
+ Args:
261
+ lyric (str):
262
+
263
+ Returns:
264
+ bool:
265
+ """
266
+ return has_target_string(lyric, r"\[ti[::]?")
267
+
268
+
269
+ def get_songname(lyric: str) -> str:
270
+ """获取文本中的歌名,输入必须类似[ti:霍元甲 (《霍元甲》电影主题曲)]
271
+
272
+ Args:
273
+ lyric (str): 含有歌名的QRC文本行
274
+
275
+ Returns:
276
+ str: 歌名
277
+ """
278
+ return lyric.split("(")[0][4:-1]
279
+
280
+
281
+ def is_album(lyric: str) -> bool:
282
+ """是否含有专辑名,文本必须类似[al:霍元甲]
283
+
284
+ Args:
285
+ lyric (str): _description_
286
+
287
+ Returns:
288
+ bool: _description_
289
+ """
290
+
291
+ return has_target_string(lyric, r"\[al[::]?")
292
+
293
+
294
+ def get_album(lyric: str) -> str:
295
+ """提取专辑名,文本必须类似[al:霍元甲]
296
+
297
+
298
+ Args:
299
+ lyric (str): 含有专辑名的QRC文本行
300
+
301
+ Returns:
302
+ str: 专辑名
303
+ """
304
+ return lyric[4:-1]
305
+
306
+
307
+ def is_singer(lyric: str) -> bool:
308
+ """是否有歌手名,目标文本类似 [ar:周杰伦]
309
+
310
+ Args:
311
+ lyric (str): _description_
312
+
313
+ Returns:
314
+ bool: _description_
315
+ """
316
+ return has_target_string(lyric, r"\[ar[::]?")
317
+
318
+
319
+ def get_singer(lyric: str) -> str:
320
+ """提取歌手信息,文本必须类似[ar:周杰伦]
321
+
322
+ Args:
323
+ lyric (str): 含有歌手名的QRC文本行
324
+
325
+ Returns:
326
+ str: 歌手名
327
+ """
328
+ return lyric[4:-1]
329
+
330
+
331
+ def lyric2musicinfo(lyric: str) -> dict:
332
+ """convert lyric content from str into musicinfo, a dict
333
+ 参考https://git.woa.com/innovative_tech/VideoMashup/blob/master/videomashup/media/media_info.py#L19
334
+ {
335
+ "meta_info": {},
336
+ "sub_meta_info": [],
337
+ "clips": [
338
+ clip
339
+ ]
340
+ }
341
+
342
+ Args:
343
+ lyric (str): 来自QRC的歌词字符串
344
+
345
+ Returns:
346
+ musicinfo: 音乐谱面字典,https://git.woa.com/innovative_tech/VideoMashup/blob/master/videomashup/media/media_info.py#L19
347
+ """
348
+ lyrics = lyric["QrcInfos"]["LyricInfo"]["Lyric_1"]["@LyricContent"]
349
+ musicinfo = {
350
+ "meta_info": {
351
+ "mediaid": None,
352
+ "media_name": None,
353
+ "singer": None,
354
+ },
355
+ "sub_meata_info": {},
356
+ "clips": [],
357
+ }
358
+ # lyrics = [line.strip() for line in re.split(r"[\t\n\s+]", lyrics)]
359
+ lyrics = ["[" + line.strip() for line in re.split(r"\[", lyrics)]
360
+ next_is_title_row = False
361
+ lyric_clips = []
362
+ for line in lyrics:
363
+ if is_songname(line):
364
+ musicinfo["meta_info"]["media_name"] = get_songname(line)
365
+ continue
366
+ if is_singer(line):
367
+ musicinfo["meta_info"]["singer"] = get_singer(line)
368
+ continue
369
+ if is_album(line):
370
+ musicinfo["meta_info"]["album"] = get_album(line)
371
+ continue
372
+ is_lyric_row = check_is_lyric_row(line)
373
+ if next_is_title_row:
374
+ next_is_title_row = False
375
+ continue
376
+ # remove tille row
377
+ if not next_is_title_row and re.search(r"\[offset[::]", line):
378
+ next_is_title_row = True
379
+ if is_lyric_row and re.match(r"\[\d+,\d+\]", line):
380
+ lyric_clip = lyric2clip(line)
381
+ lyric_clips.append(lyric_clip)
382
+ clips = lyric2clips(line)
383
+ musicinfo["clips"].extend(clips)
384
+ musicinfo["meta_info"]["lyric"] = lyric_clips
385
+ return musicinfo
386
+
387
+
388
+ def lrc_timestr2time(time_str: str) -> float:
389
+ """提取lrc中的时间戳文本,类似[00:00.00],转化成秒的浮点数
390
+
391
+ Args:
392
+ time_str (str):
393
+
394
+ Returns:
395
+ float: 时间浮点数
396
+ """
397
+ m, s, ms = (float(x) for x in re.split(r"[:.]", time_str))
398
+ return round((m * 60 + s + ms / 1000), 3)
399
+
400
+
401
+ def get_lrc_line_time(text: str, time_pattern: str) -> str:
402
+ """提取lrc中的时间字符串, 类似 \"[00:00.00]本字幕由天琴实验室独家AI字幕技术生成\"
403
+
404
+ Args:
405
+ text (str): 输入文本
406
+ time_pattern (str): 时间字符串正则表达式
407
+
408
+ Returns:
409
+ str: 符合正则表达式的时间信息文本
410
+ """
411
+ time_str = re.search(time_pattern, text).group(0)
412
+ return lrc_timestr2time(time_str)
413
+
414
+
415
+ def lrc_lyric2clip(lyric: str, time_pattern: str, duration: float) -> dict:
416
+ """将一行lrc文本字符串转化为Clip 字典
417
+
418
+ Args:
419
+ lyric (str): 类似 \"[00:00.00]本字幕由天琴实验室独家AI字幕技术生成\"
420
+ time_pattern (str): 时间字符串正则表达式,类似 r"\d+:\d+\.\d+"
421
+ duration (float): clip的时长信息,
422
+
423
+ Returns:
424
+ dict: 转化后Clip
425
+ Clip定义可以参考 https://git.woa.com/innovative_tech/VideoMashup/blob/master/videomashup/media/clip.py
426
+ """
427
+ time_str = get_lrc_line_time(lyric, time_pattern=time_pattern)
428
+ text = re.sub(time_pattern, "", lyric)
429
+ text = text[2:]
430
+ clip = {
431
+ "time_start": time_str,
432
+ "duration": duration,
433
+ "text": text,
434
+ "timepoint_type": -1,
435
+ }
436
+ return clip
437
+
438
+
439
+ def lrc2musicinfo(lyric: str, time_pattern: str = "\d+:\d+\.\d+") -> dict:
440
+ """将lrc转化为音乐谱面
441
+
442
+ Args:
443
+ lyric (str): lrc文本路径
444
+ time_pattern (str, optional): lrc时间戳字符串正则表达式. Defaults to "\d+:\d+\.\d+".
445
+
446
+ Returns:
447
+ dict: 生成的音乐谱面字典,定义可参考 https://git.woa.com/innovative_tech/VideoMashup/blob/master/videomashup/music/music_info.py
448
+ """
449
+ if isinstance(lyric, str):
450
+ if os.path.isfile(lyric):
451
+ with open(lyric, "r") as f:
452
+ lyric = [line.strip() for line in f.readlines()]
453
+ return lrc2musicinfo(lyric)
454
+ else:
455
+ lyric = lyric.split("\n")
456
+ return lrc2musicinfo(lyric)
457
+ else:
458
+ musicinfo = {
459
+ "meta_info": {
460
+ "mediaid": None,
461
+ "media_name": None,
462
+ "singer": None,
463
+ },
464
+ "sub_meata_info": {},
465
+ "clips": [],
466
+ }
467
+ # lyrics = [line.strip() for line in re.split(r"[\t\n\s+]", lyrics)]
468
+ lyric_clips = []
469
+ rows = len(lyric)
470
+ for i, line in enumerate(lyric):
471
+ if is_songname(line):
472
+ musicinfo["meta_info"]["media_name"] = line[4:-1]
473
+ continue
474
+ if is_singer(line):
475
+ musicinfo["meta_info"]["singer"] = line[4:-1]
476
+ continue
477
+ if is_album(line):
478
+ musicinfo["meta_info"]["album"] = line[4:-1]
479
+ continue
480
+ if len(re.findall(time_pattern, line)) > 0:
481
+ if i < rows - 1:
482
+ time_start = get_lrc_line_time(line, time_pattern=time_pattern)
483
+ next_line_time_start = get_lrc_line_time(
484
+ lyric[i + 1], time_pattern=time_pattern
485
+ )
486
+ duration = next_line_time_start - time_start
487
+ else:
488
+ duration = 1
489
+ clip = lrc_lyric2clip(
490
+ line, duration=duration, time_pattern=time_pattern
491
+ )
492
+ musicinfo["clips"].append(clip)
493
+ musicinfo["meta_info"]["lyric"] = lyric_clips
494
+ return musicinfo
495
+
496
+
497
+ def lyricfile2musicinfo(path: str) -> dict:
498
+ """将歌词文件转化为音乐谱面,歌词文件可以是QRC的xml文件、也可以是lrc对应的lrc文件
499
+ TODO: 待支持osu
500
+
501
+ Args:
502
+ path (str): 歌词文件路径
503
+
504
+ Returns:
505
+ dict: 音乐谱面字典,定义可参考 https://git.woa.com/innovative_tech/VideoMashup/blob/master/videomashup/music/music_info.py
506
+ """
507
+
508
+ filename, ext = os.path.basename(path).split(".")
509
+ if ext == "xml":
510
+ lyric = read_xml2json(path)
511
+ musicinfo = lyric2musicinfo(lyric)
512
+ elif ext == "lrc":
513
+ musicinfo = lrc2musicinfo(path)
514
+ musicinfo["meta_info"]["mediaid"] = filename
515
+ return musicinfo
MuseV/MMCM/mmcm/music/music_map/meta_info.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from ...data import MetaInfo
4
+
5
+
6
+ class MusicMetaInfo(MetaInfo):
7
+ def __init__(self, mediaid=None, media_name=None, media_duration=None, signature=None, media_path: str = None, media_map_path: str = None,
8
+ singer=None,
9
+ lyric_path=None,
10
+ genre=None,
11
+ language=None,
12
+ start: float = None, end: float = None, ext=None, **kwargs):
13
+ super().__init__(mediaid, media_name, media_duration, signature, media_path, media_map_path, start, end, ext, **kwargs)
14
+ self.singer = singer
15
+ self.genre = genre
16
+ self.language = language
17
+ self.lyric_path = lyric_path
18
+
19
+ @classmethod
20
+ def from_data(cls, data) -> MusicMetaInfo:
21
+ return MusicMetaInfo(**data)
MuseV/MMCM/mmcm/music/music_map/mss_map.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ from .music_clip import MusicClip, MusicClipSeq
4
+ from .music_map import MusicMap
5
+ from ...data.clip.clip_process import find_idx_by_time
6
+
7
+ logger = logging.getLogger(__name__) # pylint: disable=invalid-name
8
+
9
+
10
+ def insert_mss_2_clipseq(
11
+ clipseq: MusicClipSeq, mss_clipseq: MusicClipSeq
12
+ ) -> MusicClipSeq:
13
+ """将mss中的结构字段信息赋予到目标clipseq中的最近clip
14
+
15
+ Args:
16
+ clipseq (ClipSeq): 目标clip序列
17
+ reference (ClipSeq): 参考clip序列
18
+ field (str): 目标字段
19
+
20
+ Returns:
21
+ ClipSeq: 更新目标字段新值后的clip序列
22
+ """
23
+ for i, clip in enumerate(clipseq):
24
+ idx = find_idx_by_time(mss_clipseq, clip.time_start)
25
+ if idx is not None:
26
+ clipseq[i].stage = mss_clipseq[idx].stage
27
+ else:
28
+ clipseq[i].stage = "unknow"
29
+ return clipseq
30
+
31
+
32
+ def get_mss_musicinfo(songid: str) -> MusicMap:
33
+ """通过调用media_data中的接口 获取天琴实验室的歌曲结构信息
34
+
35
+ Args:
36
+ songid (str): 歌词id
37
+
38
+ Returns:
39
+ MusicMap: mss结构信息生成的音乐谱面
40
+ """
41
+ try:
42
+ from media_data.oi.tianqin_database import get_mss
43
+
44
+ mss = get_mss(songid=songid)
45
+ except Exception as e:
46
+ logger.warning("get mss failed, mss={}".format(songid))
47
+ logger.exception(e)
48
+ mss = None
49
+ mss_musicinfo = MusicMap(mss) if mss is not None else None
50
+ return mss_musicinfo
51
+
52
+
53
+ def merge_mss(musicinfo: MusicMap, mss: MusicMap) -> MusicMap:
54
+ """融合mss音乐谱面到目标音乐谱面
55
+
56
+ Args:
57
+ musicinfo (MusicMap): 目标音乐谱面
58
+ mss (MusicMap): 待融合的mss音乐谱面
59
+
60
+ Returns:
61
+ MusicMap: 融合后的音乐谱面
62
+ """
63
+ musicinfo.meta_info.bpm = mss.meta_info.bpm
64
+ if len(mss.clipseq) > 0:
65
+ musicinfo.clipseq = insert_mss_2_clipseq(musicinfo.clipseq, mss.clipseq)
66
+ return musicinfo
67
+
68
+
69
+ def generate_mss_from_lyric(lyrics: list, audio_duration: float, th=8) -> MusicClipSeq:
70
+ # "intro", "VA", "CA", "bridge", "VB", "CB", "end"]
71
+ mss = []
72
+ n_lyric = len(lyrics)
73
+ for lyric_idx, line_lyric_dct in enumerate(lyrics):
74
+ time_start = line_lyric_dct["time_start"]
75
+ duration = line_lyric_dct["duration"]
76
+ time_end = time_start + duration
77
+ # text = line_lyric_dct["text"]
78
+ if lyric_idx == 0:
79
+ sub_mss = {
80
+ "stage": "intro",
81
+ "time_start": 0,
82
+ "duration": time_start,
83
+ }
84
+ mss.append(sub_mss)
85
+ continue
86
+ if lyric_idx == n_lyric - 1:
87
+ sub_mss = {
88
+ "stage": "end",
89
+ "time_start": time_end,
90
+ "duration": audio_duration - time_end,
91
+ }
92
+ mss.append(sub_mss)
93
+ continue
94
+
95
+ if lyrics[lyric_idx + 1]["time_start"] - time_end >= th:
96
+ sub_mss = {
97
+ "stage": "bridge",
98
+ "time_start": time_end,
99
+ "duration": lyrics[lyric_idx + 1]["time_start"] - time_end,
100
+ }
101
+ mss.append(sub_mss)
102
+ mss_lyric = []
103
+ for sub_idx, sub_mss in enumerate(mss):
104
+ if sub_idx == len(mss) - 1:
105
+ continue
106
+ time_end = sub_mss["time_start"] + sub_mss["duration"]
107
+ next_time_start = mss[sub_idx + 1]["time_start"]
108
+ if next_time_start - time_end > 0.1:
109
+ mss_lyric.append(
110
+ {
111
+ "stage": "lyric",
112
+ "time_start": time_end,
113
+ "duration": next_time_start - time_end,
114
+ }
115
+ )
116
+ mss.extend(mss_lyric)
117
+ mss = sorted(mss, key=lambda x: x["time_start"])
118
+ mss = MusicClipSeq(mss)
119
+ return mss
120
+
121
+
122
+ def refine_mss_info_from_tianqin(
123
+ mss_info: MusicMap, lyricseq: MusicClipSeq
124
+ ) -> MusicMap:
125
+ """优化天琴的歌曲结信息,
126
+ 优化前:天琴歌曲结构里面只有每句歌词和结构信息,时间前后不连续,对于整首歌去时间结构不完备。
127
+ 优化后:增加intro,bridge,end,将相近的结构信息合并,时间前后连续,时间完备
128
+
129
+ Args:
130
+ mss_info (MusicMap): 天琴歌曲结构
131
+ lyricseq (ClipSeq): 原始歌曲信息,用于计算Intro,bridge,end。其实也可以从mss_info中获取。
132
+
133
+ Returns:
134
+ MusicMap: 优化后的歌曲结构信息
135
+ """
136
+ lyric_mss_clipseq = generate_mss_from_lyric(
137
+ lyricseq, audio_duration=mss_info.meta_info.duration
138
+ )
139
+ new_mss_clipseq = []
140
+ # lyric_mss_dct = lyric_mss_clipseq.to_dct()
141
+ # mss_dct = mss_info.clipseq.to_dct()
142
+ for l_clip_idx, lyric_clip in enumerate(lyric_mss_clipseq):
143
+ if lyric_clip.stage != "lyric":
144
+ new_mss_clipseq.append(lyric_clip)
145
+ else:
146
+ new_clip_time_start = lyric_clip.time_start
147
+ last_stage = "ANewClipStart"
148
+ for clip_idx, clip in enumerate(mss_info.clipseq):
149
+ if clip.time_start < new_clip_time_start:
150
+ continue
151
+ if (
152
+ clip.time_start >= lyric_mss_clipseq[l_clip_idx + 1].time_start
153
+ or clip_idx == len(mss_info.clipseq) - 1
154
+ ):
155
+ if clip.time_start >= lyric_mss_clipseq[l_clip_idx + 1].time_start:
156
+ stage = last_stage
157
+ # 像偶阵雨这首歌最后一个歌词段落 只有一句歌词
158
+ if clip_idx == len(mss_info.clipseq) - 1:
159
+ stage = clip.stage
160
+ new_clip_time_end = lyric_mss_clipseq[l_clip_idx + 1].time_start
161
+ new_stage_clip = {
162
+ "time_start": new_clip_time_start,
163
+ "duration": new_clip_time_end - new_clip_time_start,
164
+ "stage": stage,
165
+ }
166
+ new_mss_clipseq.append(MusicClip(**new_stage_clip))
167
+ new_clip_time_start = new_clip_time_end
168
+ last_stage = clip.stage
169
+ break
170
+ if clip.stage != last_stage:
171
+ if last_stage == "ANewClipStart":
172
+ last_stage = clip.stage
173
+ continue
174
+ new_clip_time_end = mss_info.clipseq[clip_idx].time_start
175
+ new_stage_clip = {
176
+ "time_start": new_clip_time_start,
177
+ "duration": new_clip_time_end - new_clip_time_start,
178
+ "stage": last_stage,
179
+ }
180
+ new_mss_clipseq.append(MusicClip(**new_stage_clip))
181
+ new_clip_time_start = new_clip_time_end
182
+ last_stage = clip.stage
183
+ new_mss_clipseq = MusicClipSeq(sorted(new_mss_clipseq, key=lambda x: x.time_start))
184
+ mss_info.clipseq = new_mss_clipseq
185
+ return mss_info
MuseV/MMCM/mmcm/music/music_map/music_clip.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import Dict, List
3
+
4
+ from ...data.clip import Clip, ClipSeq
5
+
6
+
7
+ class MusicClip(Clip):
8
+ def __init__(self, time_start: float, duration: float, clipid: int = None, media_type: str = None, mediaid: str = None, timepoint_type: str = None, text: str = None, stage: str = None, path: str = None, duration_num: int = None, similar_clipseq: MatchedClipIds = None, dynamic: float = None, **kwargs):
9
+ super().__init__(time_start, duration, clipid, media_type, mediaid, timepoint_type, text, stage, path, duration_num, similar_clipseq, dynamic, **kwargs)
10
+
11
+ @property
12
+ def text_num(self):
13
+ return self._cal_text_num()
14
+
15
+ @property
16
+ def original_text_num(self):
17
+ return self._cal_text_num(text_mode=1)
18
+
19
+ def _cal_text_num(self, text_mode: int = 0) -> int:
20
+ """计算 文本 字的数量
21
+
22
+ Args:
23
+ text_mode (int, optional): 0选text, 其他选original_text. Defaults to 0.
24
+
25
+ Returns:
26
+ int: _description_
27
+ """
28
+ if text_mode == 0:
29
+ text = self.text
30
+ else:
31
+ text = self.original_text
32
+ if text is None:
33
+ n_text = 0
34
+ else:
35
+ text = text.strip().split(" ")
36
+ n_text = len(text)
37
+ return n_text
38
+
39
+ @property
40
+ def text_num_per_second(self):
41
+ """单位时间内的text数量"""
42
+ return self._cal_text_num_per_second(mode=0)
43
+
44
+ @property
45
+ def original_text_num_per_second(self):
46
+ """单位时间内的original_text数量"""
47
+ return self._cal_text_num_per_second(mode=1)
48
+
49
+ @property
50
+ def tnps(self):
51
+ """单位时间内的text数量"""
52
+ return self.text_num_per_second
53
+
54
+ @property
55
+ def original_tnps(self):
56
+ """单位时间内的original_text数量"""
57
+ return self.original_text_num_per_second
58
+
59
+ def _cal_text_num_per_second(self, mode=0):
60
+ """计算单位时间内的文本数量"""
61
+ text_num = self.text_num if mode == 0 else self.original_text_num
62
+ return text_num / self.duration
63
+
64
+ @classmethod
65
+ def from_data(cls, data: Dict):
66
+ return MusicClip(**data)
67
+
68
+
69
+ class MusicClipSeq(ClipSeq):
70
+
71
+ def __init__(self, items: List[Clip] = None):
72
+ super().__init__(items)
73
+ self.clipseq = self.data
74
+
75
+ @classmethod
76
+ def from_data(cls, clipseq: List[Dict]) -> MusicClipSeq:
77
+ new_clipseq = []
78
+ for clip in clipseq:
79
+ video_clip = MusicClip.from_data(clip)
80
+ new_clipseq.append(video_clip)
81
+ video_clipseq = MusicClipSeq(new_clipseq)
82
+ return video_clipseq
83
+
MuseV/MMCM/mmcm/music/music_map/music_map.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import List, Dict
3
+
4
+ from moviepy.editor import concatenate_audioclips, AudioClip, AudioFileClip
5
+
6
+ from ...data import MediaMap, MediaMapEmb, MetaInfo, MediaMapSeq
7
+ from ...data.clip.clip_process import find_time_by_stage
8
+ from ...data.emb.h5py_emb import H5pyMediaMapEmb
9
+ from ...utils.util import load_dct_from_file
10
+
11
+ from .clip_process import get_stageseq_from_clipseq
12
+ from .music_clip import MusicClip, MusicClipSeq
13
+ from .meta_info import MusicMetaInfo
14
+
15
+
16
+ class MusicMap(MediaMap):
17
+ def __init__(
18
+ self,
19
+ meta_info: MetaInfo,
20
+ clipseq: MusicClipSeq,
21
+ lyricseq: MusicClipSeq = None,
22
+ stageseq: MusicClipSeq = None,
23
+ frameseq: MusicClipSeq = None,
24
+ emb: MediaMapEmb = None,
25
+ **kwargs,
26
+ ):
27
+ self.lyricseq = lyricseq
28
+ super().__init__(meta_info, clipseq, stageseq, frameseq, emb, **kwargs)
29
+ if self.stageseq is None:
30
+ self.stageseq = MusicClipSeq.from_data(
31
+ get_stageseq_from_clipseq(self.clipseq)
32
+ )
33
+ self.stageseq.preprocess()
34
+
35
+ def preprocess(self):
36
+ if (
37
+ hasattr(self.meta_info, "target_stages")
38
+ and self.meta_info.target_stages is not None
39
+ ):
40
+ self.set_start_end_by_target_stages()
41
+ super().preprocess()
42
+ self.spread_metainfo_2_clip(
43
+ target_keys=[
44
+ "media_path",
45
+ "media_map_path",
46
+ "emb_path",
47
+ "media_duration",
48
+ "mediaid",
49
+ "media_name",
50
+ "emb",
51
+ ]
52
+ )
53
+
54
+ def set_start_end_by_target_stages(self):
55
+ target_stages = self.meta_info.target_stages
56
+ if not isinstance(target_stages, List):
57
+ target_stages = [target_stages]
58
+ start, _ = find_time_by_stage(self.stageseq, target_stages[0])
59
+ _, end = find_time_by_stage(self.stageseq, target_stages[-1])
60
+ self.meta_info.start = start
61
+ self.meta_info.end = end
62
+
63
+ @property
64
+ def audio_clip(self) -> AudioFileClip:
65
+ """读取实际ClipSeq中的音频
66
+
67
+ Returns:
68
+ AudioClip: Moviepy中的audio_clip
69
+ """
70
+ audio_clip = AudioFileClip(self.meta_info.media_path)
71
+ audio_clip = audio_clip.subclip(self.meta_info.start, self.meta_info.end)
72
+ return audio_clip
73
+
74
+ @classmethod
75
+ def from_json_path(
76
+ cls, path: Dict, emb_path: str, media_path: str = None, **kwargs
77
+ ) -> MusicMap:
78
+ media_map = load_dct_from_file(path)
79
+ emb = H5pyMediaMapEmb(emb_path)
80
+ return cls.from_data(media_map, emb=emb, media_path=media_path, **kwargs)
81
+
82
+ @classmethod
83
+ def from_data(
84
+ cls, data: Dict, emb: H5pyMediaMapEmb, media_path: str = None, **kwargs
85
+ ) -> MusicMap:
86
+ meta_info = MusicMetaInfo.from_data(data.get("meta_info", {}))
87
+ meta_info.media_path = media_path
88
+ clipseq = MusicClipSeq.from_data(data.get("clipseq", []))
89
+ stageseq = MusicClipSeq.from_data(data.get("stageseq", []))
90
+ lyricseq = MusicClipSeq.from_data(data.get("lyricseq", []))
91
+ target_keys = ["meta_info", "clipseq", "frameseq", "stageseq", "lyricseq"]
92
+ dct = {k: data[k] for k in data.keys() if k not in target_keys}
93
+ dct.update(**kwargs)
94
+ video_map = MusicMap(
95
+ meta_info=meta_info,
96
+ clipseq=clipseq,
97
+ stageseq=stageseq,
98
+ lyricseq=lyricseq,
99
+ emb=emb,
100
+ **dct,
101
+ )
102
+ return video_map
103
+
104
+ def to_dct(
105
+ self, target_keys: List[str] = None, ignored_keys: List[str] = None
106
+ ) -> Dict:
107
+ dct = {}
108
+ dct["meta_info"] = self.meta_info.to_dct(
109
+ target_keys=target_keys, ignored_keys=ignored_keys
110
+ )
111
+ dct["clipseq"] = self.clipseq.to_dct(
112
+ target_keys=target_keys, ignored_keys=ignored_keys
113
+ )
114
+ if self.frameseq is not None:
115
+ dct["frameseq"] = self.frameseq.to_dct(
116
+ target_keys=target_keys, ignored_keys=ignored_keys
117
+ )
118
+ else:
119
+ dct["frameseq"] = None
120
+ if self.stageseq is not None:
121
+ dct["stageseq"] = self.stageseq.to_dct(
122
+ target_keys=target_keys, ignored_keys=ignored_keys
123
+ )
124
+ else:
125
+ dct["stageseq"] = None
126
+ dct["lyricseq"] = self.lyricseq.to_dct(
127
+ target_keys=target_keys, ignored_keys=ignored_keys
128
+ )
129
+ return dct
130
+
131
+
132
+ class MusicMapSeq(MediaMapSeq):
133
+ def __init__(self, maps: List[MusicMap]) -> None:
134
+ super().__init__(maps)
135
+
136
+ @property
137
+ def audio_clip(self) -> AudioFileClip:
138
+ audio_clip_lst = [m.audi_clip for m in self.maps]
139
+ audio_clip = concatenate_audioclips(audio_clip_lst)
140
+ return audio_clip
MuseV/MMCM/mmcm/music/music_map/music_map_demp.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from moviepy.editor import (
2
+ ColorClip,
3
+ concatenate_videoclips,
4
+ AudioFileClip,
5
+ CompositeVideoClip,
6
+ )
7
+
8
+ from ...vision.video_map.video_lyric import render_lyric2video
9
+ from ...vision.video_map.video_writer import write_videoclip
10
+ from .music_map import MusicMap
11
+
12
+
13
+ def generate_music_map_videodemo(
14
+ music_map: MusicMap,
15
+ path: str,
16
+ audio_path: str,
17
+ render_lyric: bool = True,
18
+ width: int = 360,
19
+ height: int = 240,
20
+ fps: int = 25,
21
+ n_thread: int = 8,
22
+ colors: list = [[51, 161, 201], [46, 139, 87]],
23
+ ) -> None:
24
+ """输入音乐谱面,生成对应的转场视频Demo,视频内容只是简单的颜色切换
25
+
26
+ Args:
27
+ music_map (MusicInfo): 待可视化的音乐谱面
28
+ path (str): 可视化视频的存储路径
29
+ audio_path (str): 音乐谱面对应的音频路径
30
+ render_lyric (bool, optional): 是否渲染歌词,歌词在音乐谱面中. Defaults to True.
31
+ width (int, optional): 可视化视频的宽. Defaults to 360.
32
+ height (int, optional): 可视化视频的高. Defaults to 240.
33
+ fps (int, optional): 可视化视频的fps. Defaults to 25.
34
+ n_thread (int, optional): 可视化视频的写入线程数. Defaults to 8.
35
+ colors (list, optional): 可视化的视频颜色. Defaults to [[51, 161, 201], [46, 139, 87]].
36
+ """
37
+ audio_clip = AudioFileClip(audio_path)
38
+ video_clips = []
39
+ size = (width, height)
40
+ for i, clip in enumerate(music_map.clipseq):
41
+ clip = ColorClip(
42
+ size=size, color=colors[i % len(colors)], duration=clip.duration
43
+ )
44
+ video_clips.append(clip)
45
+ video_clips = concatenate_videoclips(video_clips, method="compose")
46
+ if render_lyric:
47
+ video_clips = render_lyric2video(
48
+ videoclip=video_clips,
49
+ lyric=music_map,
50
+ lyric_info_type="music_map",
51
+ )
52
+ video_clips = video_clips.set_audio(audio_clip)
53
+ write_videoclip(
54
+ video_clips,
55
+ path=path,
56
+ fps=fps,
57
+ n_thread=n_thread,
58
+ )
MuseV/MMCM/mmcm/music/utils/__init__.py ADDED
File without changes
MuseV/MMCM/mmcm/music/utils/path_util.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Dict, Tuple
3
+
4
+ from ...utils.path_util import get_dir_file_map
5
+
6
+
7
+ def get_audio_path_dct(path, exts=["mp3", "flac", "wav"]) -> Dict[str, str]:
8
+ """遍历目标文件夹及子文件夹下所有音频文件,生成字典。"""
9
+ return get_dir_file_map(path, exts=exts)
MuseV/MMCM/mmcm/t2p/.gitignore ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
130
+
131
+ .vscode
132
+ dataset/dataset_TM_train_cb1_temp.py
133
+ train_gpt_cnn_temp.py
134
+ train_gpt_cnn_mask.py
135
+ start.sh
136
+ start_eval.sh
137
+ config.json
138
+ output_GPT_Final
139
+ output_vqfinal
140
+ output_transformer
141
+ glove
142
+ checkpoints
143
+ dataset/HumanML3D
144
+ dataset/KIT-ML
145
+ output
146
+ matrix_multi.py
147
+ body_models
148
+ render_final_diffuse.py
149
+ render_final_mdm.py
150
+ pretrained
151
+ MDM
152
+ Motiondiffusion
153
+ Visualize_temp.py
154
+ new.sh
155
+ T2M_render
156
+ render_final_t2m.py
157
+
158
+ pose
MuseV/MMCM/mmcm/t2p/GPT_eval_multi.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import numpy as np
4
+ from torch.utils.tensorboard import SummaryWriter
5
+ import json
6
+ import clip
7
+
8
+ import options.option_transformer as option_trans
9
+ import models.vqvae as vqvae
10
+ import utils.utils_model as utils_model
11
+ import utils.eval_trans as eval_trans
12
+ from dataset import dataset_TM_eval
13
+ import models.t2m_trans as trans
14
+ from options.get_eval_option import get_opt
15
+ from models.evaluator_wrapper import EvaluatorModelWrapper
16
+ import warnings
17
+ warnings.filterwarnings('ignore')
18
+
19
+ ##### ---- Exp dirs ---- #####
20
+ args = option_trans.get_args_parser()
21
+ torch.manual_seed(args.seed)
22
+
23
+ args.out_dir = os.path.join(args.out_dir, f'{args.exp_name}')
24
+ os.makedirs(args.out_dir, exist_ok = True)
25
+
26
+ ##### ---- Logger ---- #####
27
+ logger = utils_model.get_logger(args.out_dir)
28
+ writer = SummaryWriter(args.out_dir)
29
+ logger.info(json.dumps(vars(args), indent=4, sort_keys=True))
30
+
31
+ from utils.word_vectorizer import WordVectorizer
32
+ w_vectorizer = WordVectorizer('./glove', 'our_vab')
33
+ val_loader = dataset_TM_eval.DATALoader(args.dataname, True, 32, w_vectorizer)
34
+
35
+ dataset_opt_path = 'checkpoints/kit/Comp_v6_KLD005/opt.txt' if args.dataname == 'kit' else 'checkpoints/t2m/Comp_v6_KLD005/opt.txt'
36
+
37
+ wrapper_opt = get_opt(dataset_opt_path, torch.device('cuda'))
38
+ eval_wrapper = EvaluatorModelWrapper(wrapper_opt)
39
+
40
+ ##### ---- Network ---- #####
41
+
42
+ ## load clip model and datasets
43
+ clip_model, clip_preprocess = clip.load("ViT-B/32", device=torch.device('cuda'), jit=False) # Must set jit=False for training
44
+ clip.model.convert_weights(clip_model) # Actually this line is unnecessary since clip by default already on float16
45
+ clip_model.eval()
46
+ for p in clip_model.parameters():
47
+ p.requires_grad = False
48
+
49
+ net = vqvae.HumanVQVAE(args, ## use args to define different parameters in different quantizers
50
+ args.nb_code,
51
+ args.code_dim,
52
+ args.output_emb_width,
53
+ args.down_t,
54
+ args.stride_t,
55
+ args.width,
56
+ args.depth,
57
+ args.dilation_growth_rate)
58
+
59
+
60
+ trans_encoder = trans.Text2Motion_Transformer(num_vq=args.nb_code,
61
+ embed_dim=args.embed_dim_gpt,
62
+ clip_dim=args.clip_dim,
63
+ block_size=args.block_size,
64
+ num_layers=args.num_layers,
65
+ n_head=args.n_head_gpt,
66
+ drop_out_rate=args.drop_out_rate,
67
+ fc_rate=args.ff_rate)
68
+
69
+
70
+ print ('loading checkpoint from {}'.format(args.resume_pth))
71
+ ckpt = torch.load(args.resume_pth, map_location='cpu')
72
+ net.load_state_dict(ckpt['net'], strict=True)
73
+ net.eval()
74
+ net.cuda()
75
+
76
+ if args.resume_trans is not None:
77
+ print ('loading transformer checkpoint from {}'.format(args.resume_trans))
78
+ ckpt = torch.load(args.resume_trans, map_location='cpu')
79
+ trans_encoder.load_state_dict(ckpt['trans'], strict=True)
80
+ trans_encoder.train()
81
+ trans_encoder.cuda()
82
+
83
+
84
+ fid = []
85
+ div = []
86
+ top1 = []
87
+ top2 = []
88
+ top3 = []
89
+ matching = []
90
+ multi = []
91
+ repeat_time = 20
92
+
93
+
94
+ for i in range(repeat_time):
95
+ best_fid, best_iter, best_div, best_top1, best_top2, best_top3, best_matching, best_multi, writer, logger = eval_trans.evaluation_transformer_test(args.out_dir, val_loader, net, trans_encoder, logger, writer, 0, best_fid=1000, best_iter=0, best_div=100, best_top1=0, best_top2=0, best_top3=0, best_matching=100, best_multi=0, clip_model=clip_model, eval_wrapper=eval_wrapper, draw=False, savegif=False, save=False, savenpy=(i==0))
96
+ fid.append(best_fid)
97
+ div.append(best_div)
98
+ top1.append(best_top1)
99
+ top2.append(best_top2)
100
+ top3.append(best_top3)
101
+ matching.append(best_matching)
102
+ multi.append(best_multi)
103
+
104
+ print('final result:')
105
+ print('fid: ', sum(fid)/repeat_time)
106
+ print('div: ', sum(div)/repeat_time)
107
+ print('top1: ', sum(top1)/repeat_time)
108
+ print('top2: ', sum(top2)/repeat_time)
109
+ print('top3: ', sum(top3)/repeat_time)
110
+ print('matching: ', sum(matching)/repeat_time)
111
+ print('multi: ', sum(multi)/repeat_time)
112
+
113
+ fid = np.array(fid)
114
+ div = np.array(div)
115
+ top1 = np.array(top1)
116
+ top2 = np.array(top2)
117
+ top3 = np.array(top3)
118
+ matching = np.array(matching)
119
+ multi = np.array(multi)
120
+ msg_final = f"FID. {np.mean(fid):.3f}, conf. {np.std(fid)*1.96/np.sqrt(repeat_time):.3f}, Diversity. {np.mean(div):.3f}, conf. {np.std(div)*1.96/np.sqrt(repeat_time):.3f}, TOP1. {np.mean(top1):.3f}, conf. {np.std(top1)*1.96/np.sqrt(repeat_time):.3f}, TOP2. {np.mean(top2):.3f}, conf. {np.std(top2)*1.96/np.sqrt(repeat_time):.3f}, TOP3. {np.mean(top3):.3f}, conf. {np.std(top3)*1.96/np.sqrt(repeat_time):.3f}, Matching. {np.mean(matching):.3f}, conf. {np.std(matching)*1.96/np.sqrt(repeat_time):.3f}, Multi. {np.mean(multi):.3f}, conf. {np.std(multi)*1.96/np.sqrt(repeat_time):.3f}"
121
+ logger.info(msg_final)
MuseV/MMCM/mmcm/t2p/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2023 tencent
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.