New pyannote-v3-pro (#1)
Browse files- New pyannote-v3-pro (8c02f5e91862b3bf8a4e23ca79c301196423d673)
- New pyannote-v3-pro SpeakerSegmenter (5ea9592772d00d547da99ae3f7e5c958c78deb9f)
- Update speaker_embedder/pyannote-v3-pro/LICENSE_NOTICE.txt (c5a4c6722d64a288af1864a70f91a2930cf1aa08)
- Update speaker_embedder/pyannote-v3-pro/W16A16/LICENSE_NOTICE.txt (374499d2f345de69a51a7c2567e0e462514ff7b8)
- Update speaker_embedder/pyannote-v3-pro/W8A16/LICENSE_NOTICE.txt (f1960989a5088756b8cbfdde1fa9c75bd7c1adcd)
- Update speaker_segmenter/pyannote-v3-pro/LICENSE_NOTICE.txt (f6b4119078515307b830d24254f50a766df950fd)
- Update speaker_segmenter/pyannote-v3-pro/W32A32/LICENSE_NOTICE.txt (b6731a299fd0f81f5617aaf6f246d62722ddafa2)
- Update speaker_segmenter/pyannote-v3-pro/W8A16/LICENSE_NOTICE.txt (d82d6c440f605ad1dcb58d7925aac3a233118131)
- Delete speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbeddingPreprocessor.mlmodelc (c79f308c1ca50c9e24487d0c18f43820c06d2b1b)
- Delete speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbeddingPreprocessor.mlmodelc (9b92fca14e181a18c573d15e92f3c15ee45ddb3a)
- pyannote-v3-pro W16A16 SpeakerEmbedderPreprocessor (acdfedd1ec4e0a8802af60c32a3e41dd115e0494)
- pyannote-v3-pro W8A16 SpeakerEmbedderPreprocessor (29c808ac698e3a6c48f752a05162321f1883f57e)
- speaker_embedder/pyannote-v3-pro/LICENSE_NOTICE.txt +7 -0
- speaker_embedder/pyannote-v3-pro/README.txt +6 -0
- speaker_embedder/pyannote-v3-pro/W16A16/LICENSE_NOTICE.txt +7 -0
- speaker_embedder/pyannote-v3-pro/W16A16/README.txt +6 -0
- speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/analytics/coremldata.bin +3 -0
- speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/coremldata.bin +3 -0
- speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/metadata.json +86 -0
- speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/model.mil +0 -0
- speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/weights/weight.bin +3 -0
- speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/analytics/coremldata.bin +3 -0
- speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/coremldata.bin +3 -0
- speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/metadata.json +77 -0
- speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/model.mil +90 -0
- speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/weights/weight.bin +3 -0
- speaker_embedder/pyannote-v3-pro/W8A16/LICENSE_NOTICE.txt +7 -0
- speaker_embedder/pyannote-v3-pro/W8A16/README.txt +6 -0
- speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/analytics/coremldata.bin +3 -0
- speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/coremldata.bin +3 -0
- speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/metadata.json +87 -0
- speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/model.mil +0 -0
- speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/weights/weight.bin +3 -0
- speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/analytics/coremldata.bin +3 -0
- speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/coremldata.bin +3 -0
- speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/metadata.json +77 -0
- speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/model.mil +90 -0
- speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/weights/weight.bin +3 -0
- speaker_segmenter/pyannote-v3-pro/LICENSE_NOTICE.txt +7 -0
- speaker_segmenter/pyannote-v3-pro/README.txt +6 -0
- speaker_segmenter/pyannote-v3-pro/W32A32/LICENSE_NOTICE.txt +7 -0
- speaker_segmenter/pyannote-v3-pro/W32A32/README.txt +6 -0
- speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/analytics/coremldata.bin +3 -0
- speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/coremldata.bin +3 -0
- speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/metadata.json +144 -0
- speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/model.mil +0 -0
- speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/weights/weight.bin +3 -0
- speaker_segmenter/pyannote-v3-pro/W8A16/LICENSE_NOTICE.txt +7 -0
- speaker_segmenter/pyannote-v3-pro/W8A16/README.txt +6 -0
- speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/analytics/coremldata.bin +3 -0
- speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/coremldata.bin +3 -0
- speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/metadata.json +145 -0
- speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/model.mil +0 -0
- speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/weights/weight.bin +3 -0
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Argmax proprietary and confidential. Under NDA.
|
2 |
+
|
3 |
+
Copyright 2025 Argmax, Inc. All rights reserved.
|
4 |
+
|
5 |
+
Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
|
6 |
+
|
7 |
+
Please contact Argmax for licensing information at [email protected].
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# License
|
2 |
+
|
3 |
+
Original model weights: https://github.com/wenet-e2e/wespeaker/blob/master/docs/pretrained.md#model-license
|
4 |
+
Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
|
5 |
+
|
6 |
+
Please contact [email protected] for licensing SpeakerKit Pro assets
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Argmax proprietary and confidential. Under NDA.
|
2 |
+
|
3 |
+
Copyright 2025 Argmax, Inc. All rights reserved.
|
4 |
+
|
5 |
+
Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
|
6 |
+
|
7 |
+
Please contact Argmax for licensing information at [email protected].
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# License
|
2 |
+
|
3 |
+
Original model weights: https://github.com/wenet-e2e/wespeaker/blob/master/docs/pretrained.md#model-license
|
4 |
+
Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
|
5 |
+
|
6 |
+
Please contact [email protected] for licensing SpeakerKit Pro assets
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31ebd86f9f3a87ee2bb1aa32722968e3f6821d6b393171bb6b92683213f173e1
|
3 |
+
size 243
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bbb69d66cdb7f69a6c9d4dc1ff9b114d6097bb69581e12a706cf16e27ce10be
|
3 |
+
size 370
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"metadataOutputVersion" : "3.0",
|
4 |
+
"storagePrecision" : "Float16",
|
5 |
+
"outputSchema" : [
|
6 |
+
{
|
7 |
+
"hasShapeFlexibility" : "0",
|
8 |
+
"isOptional" : "0",
|
9 |
+
"dataType" : "Float16",
|
10 |
+
"formattedType" : "MultiArray (Float16 1 × 64 × 256)",
|
11 |
+
"shortDescription" : "",
|
12 |
+
"shape" : "[1, 64, 256]",
|
13 |
+
"name" : "speaker_embeddings",
|
14 |
+
"type" : "MultiArray"
|
15 |
+
}
|
16 |
+
],
|
17 |
+
"modelParameters" : [
|
18 |
+
|
19 |
+
],
|
20 |
+
"specificationVersion" : 7,
|
21 |
+
"mlProgramOperationTypeHistogram" : {
|
22 |
+
"Concat" : 64,
|
23 |
+
"Ios16.mul" : 256,
|
24 |
+
"SliceByIndex" : 64,
|
25 |
+
"Transpose" : 1,
|
26 |
+
"Ios16.sub" : 128,
|
27 |
+
"Ios16.sqrt" : 64,
|
28 |
+
"Stack" : 1,
|
29 |
+
"UpsampleNearestNeighbor" : 1,
|
30 |
+
"Ios16.conv" : 36,
|
31 |
+
"Ios16.add" : 144,
|
32 |
+
"Squeeze" : 1,
|
33 |
+
"Ios16.relu" : 33,
|
34 |
+
"Ios16.realDiv" : 192,
|
35 |
+
"Ios16.reduceSum" : 256,
|
36 |
+
"ExpandDims" : 130,
|
37 |
+
"Ios16.linear" : 1,
|
38 |
+
"Ios16.reshape" : 1
|
39 |
+
},
|
40 |
+
"computePrecision" : "Mixed (Float16, Float32, Int32)",
|
41 |
+
"isUpdatable" : "0",
|
42 |
+
"stateSchema" : [
|
43 |
+
|
44 |
+
],
|
45 |
+
"availability" : {
|
46 |
+
"macOS" : "13.0",
|
47 |
+
"tvOS" : "16.0",
|
48 |
+
"visionOS" : "1.0",
|
49 |
+
"watchOS" : "9.0",
|
50 |
+
"iOS" : "16.0",
|
51 |
+
"macCatalyst" : "16.0"
|
52 |
+
},
|
53 |
+
"modelType" : {
|
54 |
+
"name" : "MLModelType_mlProgram"
|
55 |
+
},
|
56 |
+
"userDefinedMetadata" : {
|
57 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript",
|
58 |
+
"com.github.apple.coremltools.version" : "8.2",
|
59 |
+
"com.github.apple.coremltools.source" : "torch==2.6.0"
|
60 |
+
},
|
61 |
+
"inputSchema" : [
|
62 |
+
{
|
63 |
+
"hasShapeFlexibility" : "0",
|
64 |
+
"isOptional" : "0",
|
65 |
+
"dataType" : "Float16",
|
66 |
+
"formattedType" : "MultiArray (Float16 1 × 998 × 80)",
|
67 |
+
"shortDescription" : "",
|
68 |
+
"shape" : "[1, 998, 80]",
|
69 |
+
"name" : "preprocessor_output_1",
|
70 |
+
"type" : "MultiArray"
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"hasShapeFlexibility" : "0",
|
74 |
+
"isOptional" : "0",
|
75 |
+
"dataType" : "Float16",
|
76 |
+
"formattedType" : "MultiArray (Float16 1 × 64 × 589)",
|
77 |
+
"shortDescription" : "",
|
78 |
+
"shape" : "[1, 64, 589]",
|
79 |
+
"name" : "speaker_masks",
|
80 |
+
"type" : "MultiArray"
|
81 |
+
}
|
82 |
+
],
|
83 |
+
"generatedClassName" : "SpeakerEmbedding",
|
84 |
+
"method" : "predict"
|
85 |
+
}
|
86 |
+
]
|
The diff for this file is too large to render.
See raw diff
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6dba18a57a81b1e872802ca4def29541bb7900ccff430d9b2040092cadd7d688
|
3 |
+
size 13264960
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1383750df1af99af002f0c3403bcf19a18c3d749706eb3498d34b0fe01abf2fc
|
3 |
+
size 243
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc230803421bf4fe14f843ea5b5fa0035487fd19cdd69ed670d72560b6a44586
|
3 |
+
size 330
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"metadataOutputVersion" : "3.0",
|
4 |
+
"storagePrecision" : "Float32",
|
5 |
+
"outputSchema" : [
|
6 |
+
{
|
7 |
+
"hasShapeFlexibility" : "0",
|
8 |
+
"isOptional" : "0",
|
9 |
+
"dataType" : "Float16",
|
10 |
+
"formattedType" : "MultiArray (Float16 1 × 2998 × 80)",
|
11 |
+
"shortDescription" : "",
|
12 |
+
"shape" : "[1, 2998, 80]",
|
13 |
+
"name" : "preprocessor_output_1",
|
14 |
+
"type" : "MultiArray"
|
15 |
+
}
|
16 |
+
],
|
17 |
+
"modelParameters" : [
|
18 |
+
|
19 |
+
],
|
20 |
+
"specificationVersion" : 7,
|
21 |
+
"mlProgramOperationTypeHistogram" : {
|
22 |
+
"Ios16.cast" : 2,
|
23 |
+
"Ios16.mul" : 4,
|
24 |
+
"SliceByIndex" : 2,
|
25 |
+
"Transpose" : 2,
|
26 |
+
"SlidingWindows" : 1,
|
27 |
+
"Ios16.sub" : 3,
|
28 |
+
"Ios16.log" : 1,
|
29 |
+
"Ios16.reduceMean" : 2,
|
30 |
+
"Ios16.square" : 2,
|
31 |
+
"Squeeze" : 2,
|
32 |
+
"Ios16.matmul" : 2,
|
33 |
+
"Ios16.add" : 1,
|
34 |
+
"Ios16.linear" : 1,
|
35 |
+
"ExpandDims" : 4,
|
36 |
+
"Ios16.gather" : 2,
|
37 |
+
"Ios16.maximum" : 1,
|
38 |
+
"Identity" : 1,
|
39 |
+
"Pad" : 2
|
40 |
+
},
|
41 |
+
"computePrecision" : "Mixed (Float16, Float32, Int32)",
|
42 |
+
"isUpdatable" : "0",
|
43 |
+
"stateSchema" : [
|
44 |
+
|
45 |
+
],
|
46 |
+
"availability" : {
|
47 |
+
"macOS" : "13.0",
|
48 |
+
"tvOS" : "16.0",
|
49 |
+
"visionOS" : "1.0",
|
50 |
+
"watchOS" : "9.0",
|
51 |
+
"iOS" : "16.0",
|
52 |
+
"macCatalyst" : "16.0"
|
53 |
+
},
|
54 |
+
"modelType" : {
|
55 |
+
"name" : "MLModelType_mlProgram"
|
56 |
+
},
|
57 |
+
"userDefinedMetadata" : {
|
58 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript",
|
59 |
+
"com.github.apple.coremltools.source" : "torch==2.6.0",
|
60 |
+
"com.github.apple.coremltools.version" : "8.2"
|
61 |
+
},
|
62 |
+
"inputSchema" : [
|
63 |
+
{
|
64 |
+
"hasShapeFlexibility" : "0",
|
65 |
+
"isOptional" : "0",
|
66 |
+
"dataType" : "Float16",
|
67 |
+
"formattedType" : "MultiArray (Float16 1 × 480000)",
|
68 |
+
"shortDescription" : "",
|
69 |
+
"shape" : "[1, 480000]",
|
70 |
+
"name" : "waveforms",
|
71 |
+
"type" : "MultiArray"
|
72 |
+
}
|
73 |
+
],
|
74 |
+
"generatedClassName" : "SpeakerEmbeddingPreprocessor",
|
75 |
+
"method" : "predict"
|
76 |
+
}
|
77 |
+
]
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
program(1.0)
|
2 |
+
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}, {"coremltools-component-torch", "2.6.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.2"}})]
|
3 |
+
{
|
4 |
+
func main<ios16>(tensor<fp16, [1, 480000]> waveforms) {
|
5 |
+
tensor<string, []> cast_0_dtype_0 = const()[name = tensor<string, []>("cast_0_dtype_0"), val = tensor<string, []>("fp32")];
|
6 |
+
tensor<fp32, []> var_2_promoted = const()[name = tensor<string, []>("op_2_promoted"), val = tensor<fp32, []>(0x1p+15)];
|
7 |
+
tensor<fp32, [1, 480000]> cast_0 = cast(dtype = cast_0_dtype_0, x = waveforms)[name = tensor<string, []>("cast_11")];
|
8 |
+
tensor<fp32, [1, 480000]> waveform_1 = mul(x = cast_0, y = var_2_promoted)[name = tensor<string, []>("waveform_1")];
|
9 |
+
tensor<int32, [2]> var_6_begin_0 = const()[name = tensor<string, []>("op_6_begin_0"), val = tensor<int32, [2]>([0, 0])];
|
10 |
+
tensor<int32, [2]> var_6_end_0 = const()[name = tensor<string, []>("op_6_end_0"), val = tensor<int32, [2]>([1, 480000])];
|
11 |
+
tensor<bool, [2]> var_6_end_mask_0 = const()[name = tensor<string, []>("op_6_end_mask_0"), val = tensor<bool, [2]>([false, true])];
|
12 |
+
tensor<bool, [2]> var_6_squeeze_mask_0 = const()[name = tensor<string, []>("op_6_squeeze_mask_0"), val = tensor<bool, [2]>([true, false])];
|
13 |
+
tensor<fp32, [480000]> var_6 = slice_by_index(begin = var_6_begin_0, end = var_6_end_0, end_mask = var_6_end_mask_0, squeeze_mask = var_6_squeeze_mask_0, x = waveform_1)[name = tensor<string, []>("op_6")];
|
14 |
+
tensor<int32, []> sliding_windows_0_axis_0 = const()[name = tensor<string, []>("sliding_windows_0_axis_0"), val = tensor<int32, []>(0)];
|
15 |
+
tensor<int32, []> sliding_windows_0_size_0 = const()[name = tensor<string, []>("sliding_windows_0_size_0"), val = tensor<int32, []>(400)];
|
16 |
+
tensor<int32, []> sliding_windows_0_stride_0 = const()[name = tensor<string, []>("sliding_windows_0_stride_0"), val = tensor<int32, []>(160)];
|
17 |
+
tensor<fp32, [2998, 400]> sliding_windows_0 = sliding_windows(axis = sliding_windows_0_axis_0, size = sliding_windows_0_size_0, stride = sliding_windows_0_stride_0, x = var_6)[name = tensor<string, []>("sliding_windows_0")];
|
18 |
+
tensor<int32, [1]> var_42_axes_0 = const()[name = tensor<string, []>("op_42_axes_0"), val = tensor<int32, [1]>([1])];
|
19 |
+
tensor<bool, []> var_42_keep_dims_0 = const()[name = tensor<string, []>("op_42_keep_dims_0"), val = tensor<bool, []>(false)];
|
20 |
+
tensor<fp32, [2998]> var_42 = reduce_mean(axes = var_42_axes_0, keep_dims = var_42_keep_dims_0, x = sliding_windows_0)[name = tensor<string, []>("op_42")];
|
21 |
+
tensor<int32, [1]> row_means_axes_0 = const()[name = tensor<string, []>("row_means_axes_0"), val = tensor<int32, [1]>([1])];
|
22 |
+
tensor<fp32, [2998, 1]> row_means = expand_dims(axes = row_means_axes_0, x = var_42)[name = tensor<string, []>("row_means")];
|
23 |
+
tensor<fp32, [2998, 400]> strided_input_3 = sub(x = sliding_windows_0, y = row_means)[name = tensor<string, []>("strided_input_3")];
|
24 |
+
tensor<int32, [1]> input_1_axes_0 = const()[name = tensor<string, []>("input_1_axes_0"), val = tensor<int32, [1]>([0])];
|
25 |
+
tensor<fp32, [1, 2998, 400]> input_1 = expand_dims(axes = input_1_axes_0, x = strided_input_3)[name = tensor<string, []>("input_1")];
|
26 |
+
tensor<fp32, []> const_2 = const()[name = tensor<string, []>("const_2"), val = tensor<fp32, []>(0x0p+0)];
|
27 |
+
tensor<int32, [6]> var_54_pad_0 = const()[name = tensor<string, []>("op_54_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 1, 0])];
|
28 |
+
tensor<string, []> var_54_mode_0 = const()[name = tensor<string, []>("op_54_mode_0"), val = tensor<string, []>("replicate")];
|
29 |
+
tensor<fp32, [1, 2998, 401]> var_54 = pad(constant_val = const_2, mode = var_54_mode_0, pad = var_54_pad_0, x = input_1)[name = tensor<string, []>("op_54")];
|
30 |
+
tensor<int32, [1]> offset_strided_input_axes_0 = const()[name = tensor<string, []>("offset_strided_input_axes_0"), val = tensor<int32, [1]>([0])];
|
31 |
+
tensor<fp32, [2998, 401]> offset_strided_input = squeeze(axes = offset_strided_input_axes_0, x = var_54)[name = tensor<string, []>("offset_strided_input")];
|
32 |
+
tensor<int32, [2]> var_66_begin_0 = const()[name = tensor<string, []>("op_66_begin_0"), val = tensor<int32, [2]>([0, 0])];
|
33 |
+
tensor<int32, [2]> var_66_end_0 = const()[name = tensor<string, []>("op_66_end_0"), val = tensor<int32, [2]>([2998, 400])];
|
34 |
+
tensor<bool, [2]> var_66_end_mask_0 = const()[name = tensor<string, []>("op_66_end_mask_0"), val = tensor<bool, [2]>([true, false])];
|
35 |
+
tensor<fp32, [2998, 400]> var_66 = slice_by_index(begin = var_66_begin_0, end = var_66_end_0, end_mask = var_66_end_mask_0, x = offset_strided_input)[name = tensor<string, []>("op_66")];
|
36 |
+
tensor<fp32, []> var_67 = const()[name = tensor<string, []>("op_67"), val = tensor<fp32, []>(0x1.f0a3d8p-1)];
|
37 |
+
tensor<fp32, [2998, 400]> var_68 = mul(x = var_66, y = var_67)[name = tensor<string, []>("op_68")];
|
38 |
+
tensor<fp32, [2998, 400]> strided_input_5 = sub(x = strided_input_3, y = var_68)[name = tensor<string, []>("strided_input_5")];
|
39 |
+
tensor<fp32, [1, 400]> window_function = const()[name = tensor<string, []>("window_function"), val = tensor<fp32, [1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
|
40 |
+
tensor<fp32, [2998, 400]> strided_input_7 = mul(x = strided_input_5, y = window_function)[name = tensor<string, []>("strided_input_7")];
|
41 |
+
tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([0])];
|
42 |
+
tensor<fp32, [1, 2998, 400]> input_3 = expand_dims(axes = input_3_axes_0, x = strided_input_7)[name = tensor<string, []>("input_3")];
|
43 |
+
tensor<fp32, []> const_3 = const()[name = tensor<string, []>("const_3"), val = tensor<fp32, []>(0x0p+0)];
|
44 |
+
tensor<int32, [6]> var_90_pad_0 = const()[name = tensor<string, []>("op_90_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 0, 112])];
|
45 |
+
tensor<string, []> var_90_mode_0 = const()[name = tensor<string, []>("op_90_mode_0"), val = tensor<string, []>("constant")];
|
46 |
+
tensor<fp32, [1, 2998, 512]> var_90 = pad(constant_val = const_3, mode = var_90_mode_0, pad = var_90_pad_0, x = input_3)[name = tensor<string, []>("op_90")];
|
47 |
+
tensor<int32, [1]> strided_input_axes_0 = const()[name = tensor<string, []>("strided_input_axes_0"), val = tensor<int32, [1]>([0])];
|
48 |
+
tensor<fp32, [2998, 512]> strided_input = squeeze(axes = strided_input_axes_0, x = var_90)[name = tensor<string, []>("strided_input")];
|
49 |
+
tensor<fp32, [512, 512]> cos_0 = const()[name = tensor<string, []>("cos_0"), val = tensor<fp32, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1728)))];
|
50 |
+
tensor<fp32, [512, 512]> sin_0 = const()[name = tensor<string, []>("sin_0"), val = tensor<fp32, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050368)))];
|
51 |
+
tensor<bool, []> matmul_1_transpose_x_1 = const()[name = tensor<string, []>("matmul_1_transpose_x_1"), val = tensor<bool, []>(false)];
|
52 |
+
tensor<bool, []> matmul_1_transpose_y_1 = const()[name = tensor<string, []>("matmul_1_transpose_y_1"), val = tensor<bool, []>(true)];
|
53 |
+
tensor<fp32, [512, 2998]> matmul_1 = matmul(transpose_x = matmul_1_transpose_x_1, transpose_y = matmul_1_transpose_y_1, x = cos_0, y = strided_input)[name = tensor<string, []>("matmul_1")];
|
54 |
+
tensor<bool, []> matmul_3_transpose_x_1 = const()[name = tensor<string, []>("matmul_3_transpose_x_1"), val = tensor<bool, []>(false)];
|
55 |
+
tensor<bool, []> matmul_3_transpose_y_1 = const()[name = tensor<string, []>("matmul_3_transpose_y_1"), val = tensor<bool, []>(true)];
|
56 |
+
tensor<fp32, [512, 2998]> matmul_3 = matmul(transpose_x = matmul_3_transpose_x_1, transpose_y = matmul_3_transpose_y_1, x = sin_0, y = strided_input)[name = tensor<string, []>("matmul_3")];
|
57 |
+
tensor<fp32, []> mul_1_y_0 = const()[name = tensor<string, []>("mul_1_y_0"), val = tensor<fp32, []>(-0x1p+0)];
|
58 |
+
tensor<fp32, [512, 2998]> mul_1 = mul(x = matmul_3, y = mul_1_y_0)[name = tensor<string, []>("mul_1")];
|
59 |
+
tensor<int32, [2]> transpose_3_perm_0 = const()[name = tensor<string, []>("transpose_3_perm_0"), val = tensor<int32, [2]>([-1, 0])];
|
60 |
+
tensor<int32, [2]> transpose_4_perm_0 = const()[name = tensor<string, []>("transpose_4_perm_0"), val = tensor<int32, [2]>([-1, 0])];
|
61 |
+
tensor<int32, [257]> range_1d_2 = const()[name = tensor<string, []>("range_1d_2"), val = tensor<int32, [257]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256])];
|
62 |
+
tensor<int32, []> gather_0_axis_0 = const()[name = tensor<string, []>("gather_0_axis_0"), val = tensor<int32, []>(-1)];
|
63 |
+
tensor<int32, []> gather_0_batch_dims_0 = const()[name = tensor<string, []>("gather_0_batch_dims_0"), val = tensor<int32, []>(0)];
|
64 |
+
tensor<fp32, [2998, 512]> transpose_3 = transpose(perm = transpose_3_perm_0, x = matmul_1)[name = tensor<string, []>("transpose_6")];
|
65 |
+
tensor<fp32, [2998, 257]> gather_0 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = range_1d_2, x = transpose_3)[name = tensor<string, []>("gather_0")];
|
66 |
+
tensor<int32, []> gather_1_axis_0 = const()[name = tensor<string, []>("gather_1_axis_0"), val = tensor<int32, []>(-1)];
|
67 |
+
tensor<int32, []> gather_1_batch_dims_0 = const()[name = tensor<string, []>("gather_1_batch_dims_0"), val = tensor<int32, []>(0)];
|
68 |
+
tensor<fp32, [2998, 512]> transpose_4 = transpose(perm = transpose_4_perm_0, x = mul_1)[name = tensor<string, []>("transpose_5")];
|
69 |
+
tensor<fp32, [2998, 257]> gather_1 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = range_1d_2, x = transpose_4)[name = tensor<string, []>("gather_1")];
|
70 |
+
tensor<fp32, [2998, 257]> square_0 = square(x = gather_0)[name = tensor<string, []>("square_0")];
|
71 |
+
tensor<fp32, [2998, 257]> square_1 = square(x = gather_1)[name = tensor<string, []>("square_1")];
|
72 |
+
tensor<fp32, [2998, 257]> add_1 = add(x = square_0, y = square_1)[name = tensor<string, []>("add_1")];
|
73 |
+
tensor<fp32, [2998, 257]> spectrum = identity(x = add_1)[name = tensor<string, []>("spectrum")];
|
74 |
+
tensor<fp32, [80, 257]> mel_energies_3 = const()[name = tensor<string, []>("mel_energies_3"), val = tensor<fp32, [80, 257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2099008)))];
|
75 |
+
tensor<fp32, [80]> mel_energies_bias_0 = const()[name = tensor<string, []>("mel_energies_bias_0"), val = tensor<fp32, [80]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2181312)))];
|
76 |
+
tensor<fp32, [2998, 80]> mel_energies = linear(bias = mel_energies_bias_0, weight = mel_energies_3, x = spectrum)[name = tensor<string, []>("mel_energies")];
|
77 |
+
tensor<fp32, []> const_10 = const()[name = tensor<string, []>("const_10"), val = tensor<fp32, []>(0x1p-23)];
|
78 |
+
tensor<fp32, [2998, 80]> var_186 = maximum(x = mel_energies, y = const_10)[name = tensor<string, []>("op_186")];
|
79 |
+
tensor<fp32, []> filter_banks_epsilon_0 = const()[name = tensor<string, []>("filter_banks_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
|
80 |
+
tensor<fp32, [2998, 80]> filter_banks = log(epsilon = filter_banks_epsilon_0, x = var_186)[name = tensor<string, []>("filter_banks")];
|
81 |
+
tensor<int32, [1]> var_192_axes_0 = const()[name = tensor<string, []>("op_192_axes_0"), val = tensor<int32, [1]>([0])];
|
82 |
+
tensor<bool, []> var_192_keep_dims_0 = const()[name = tensor<string, []>("op_192_keep_dims_0"), val = tensor<bool, []>(true)];
|
83 |
+
tensor<fp32, [1, 80]> var_192 = reduce_mean(axes = var_192_axes_0, keep_dims = var_192_keep_dims_0, x = filter_banks)[name = tensor<string, []>("op_192")];
|
84 |
+
tensor<fp32, [2998, 80]> var_194 = sub(x = filter_banks, y = var_192)[name = tensor<string, []>("op_194")];
|
85 |
+
tensor<int32, [1]> obj_axes_0 = const()[name = tensor<string, []>("obj_axes_0"), val = tensor<int32, [1]>([0])];
|
86 |
+
tensor<fp32, [1, 2998, 80]> preprocessor_output_1_type_fp32 = expand_dims(axes = obj_axes_0, x = var_194)[name = tensor<string, []>("obj")];
|
87 |
+
tensor<string, []> cast_9_dtype_0 = const()[name = tensor<string, []>("cast_9_dtype_0"), val = tensor<string, []>("fp16")];
|
88 |
+
tensor<fp16, [1, 2998, 80]> preprocessor_output_1 = cast(dtype = cast_9_dtype_0, x = preprocessor_output_1_type_fp32)[name = tensor<string, []>("cast_10")];
|
89 |
+
} -> (preprocessor_output_1);
|
90 |
+
}
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f2c284bd22f1f7ab76901c1c6e57f82d4ebbf057fa0b924aad057f124f77a89
|
3 |
+
size 2181696
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Argmax proprietary and confidential. Under NDA.
|
2 |
+
|
3 |
+
Copyright 2025 Argmax, Inc. All rights reserved.
|
4 |
+
|
5 |
+
Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
|
6 |
+
|
7 |
+
Please contact Argmax for licensing information at [email protected].
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# License
|
2 |
+
|
3 |
+
Original model weights: https://github.com/wenet-e2e/wespeaker/blob/master/docs/pretrained.md#model-license
|
4 |
+
Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
|
5 |
+
|
6 |
+
Please contact [email protected] for licensing SpeakerKit Pro assets
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:642e75621f11e461372ebf68c2861b4d56b48b89027295cfac05b518f88fc8f7
|
3 |
+
size 243
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bbb69d66cdb7f69a6c9d4dc1ff9b114d6097bb69581e12a706cf16e27ce10be
|
3 |
+
size 370
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"metadataOutputVersion" : "3.0",
|
4 |
+
"storagePrecision" : "Mixed (Float16, Palettized (8 bits))",
|
5 |
+
"outputSchema" : [
|
6 |
+
{
|
7 |
+
"hasShapeFlexibility" : "0",
|
8 |
+
"isOptional" : "0",
|
9 |
+
"dataType" : "Float16",
|
10 |
+
"formattedType" : "MultiArray (Float16 1 × 64 × 256)",
|
11 |
+
"shortDescription" : "",
|
12 |
+
"shape" : "[1, 64, 256]",
|
13 |
+
"name" : "speaker_embeddings",
|
14 |
+
"type" : "MultiArray"
|
15 |
+
}
|
16 |
+
],
|
17 |
+
"modelParameters" : [
|
18 |
+
|
19 |
+
],
|
20 |
+
"specificationVersion" : 7,
|
21 |
+
"mlProgramOperationTypeHistogram" : {
|
22 |
+
"Concat" : 64,
|
23 |
+
"Ios16.mul" : 256,
|
24 |
+
"SliceByIndex" : 64,
|
25 |
+
"Ios16.constexprLutToDense" : 35,
|
26 |
+
"Transpose" : 1,
|
27 |
+
"Ios16.sub" : 128,
|
28 |
+
"Ios16.sqrt" : 64,
|
29 |
+
"Stack" : 1,
|
30 |
+
"UpsampleNearestNeighbor" : 1,
|
31 |
+
"Ios16.conv" : 36,
|
32 |
+
"Ios16.add" : 144,
|
33 |
+
"Squeeze" : 1,
|
34 |
+
"Ios16.relu" : 33,
|
35 |
+
"Ios16.realDiv" : 192,
|
36 |
+
"Ios16.reduceSum" : 256,
|
37 |
+
"ExpandDims" : 130,
|
38 |
+
"Ios16.linear" : 1,
|
39 |
+
"Ios16.reshape" : 1
|
40 |
+
},
|
41 |
+
"computePrecision" : "Mixed (Float16, Float32, Int32)",
|
42 |
+
"isUpdatable" : "0",
|
43 |
+
"stateSchema" : [
|
44 |
+
|
45 |
+
],
|
46 |
+
"availability" : {
|
47 |
+
"macOS" : "13.0",
|
48 |
+
"tvOS" : "16.0",
|
49 |
+
"visionOS" : "1.0",
|
50 |
+
"watchOS" : "9.0",
|
51 |
+
"iOS" : "16.0",
|
52 |
+
"macCatalyst" : "16.0"
|
53 |
+
},
|
54 |
+
"modelType" : {
|
55 |
+
"name" : "MLModelType_mlProgram"
|
56 |
+
},
|
57 |
+
"userDefinedMetadata" : {
|
58 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript",
|
59 |
+
"com.github.apple.coremltools.source" : "torch==2.6.0",
|
60 |
+
"com.github.apple.coremltools.version" : "8.2"
|
61 |
+
},
|
62 |
+
"inputSchema" : [
|
63 |
+
{
|
64 |
+
"hasShapeFlexibility" : "0",
|
65 |
+
"isOptional" : "0",
|
66 |
+
"dataType" : "Float16",
|
67 |
+
"formattedType" : "MultiArray (Float16 1 × 998 × 80)",
|
68 |
+
"shortDescription" : "",
|
69 |
+
"shape" : "[1, 998, 80]",
|
70 |
+
"name" : "preprocessor_output_1",
|
71 |
+
"type" : "MultiArray"
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"hasShapeFlexibility" : "0",
|
75 |
+
"isOptional" : "0",
|
76 |
+
"dataType" : "Float16",
|
77 |
+
"formattedType" : "MultiArray (Float16 1 × 64 × 589)",
|
78 |
+
"shortDescription" : "",
|
79 |
+
"shape" : "[1, 64, 589]",
|
80 |
+
"name" : "speaker_masks",
|
81 |
+
"type" : "MultiArray"
|
82 |
+
}
|
83 |
+
],
|
84 |
+
"generatedClassName" : "SpeakerEmbedding_8_bit",
|
85 |
+
"method" : "predict"
|
86 |
+
}
|
87 |
+
]
|
The diff for this file is too large to render.
See raw diff
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14c666c28766dd780a3afc3dbb4b44078e418e16f44d97780b9f8555407213a1
|
3 |
+
size 6661888
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1383750df1af99af002f0c3403bcf19a18c3d749706eb3498d34b0fe01abf2fc
|
3 |
+
size 243
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc230803421bf4fe14f843ea5b5fa0035487fd19cdd69ed670d72560b6a44586
|
3 |
+
size 330
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"metadataOutputVersion" : "3.0",
|
4 |
+
"storagePrecision" : "Float32",
|
5 |
+
"outputSchema" : [
|
6 |
+
{
|
7 |
+
"hasShapeFlexibility" : "0",
|
8 |
+
"isOptional" : "0",
|
9 |
+
"dataType" : "Float16",
|
10 |
+
"formattedType" : "MultiArray (Float16 1 × 2998 × 80)",
|
11 |
+
"shortDescription" : "",
|
12 |
+
"shape" : "[1, 2998, 80]",
|
13 |
+
"name" : "preprocessor_output_1",
|
14 |
+
"type" : "MultiArray"
|
15 |
+
}
|
16 |
+
],
|
17 |
+
"modelParameters" : [
|
18 |
+
|
19 |
+
],
|
20 |
+
"specificationVersion" : 7,
|
21 |
+
"mlProgramOperationTypeHistogram" : {
|
22 |
+
"Ios16.cast" : 2,
|
23 |
+
"Ios16.mul" : 4,
|
24 |
+
"SliceByIndex" : 2,
|
25 |
+
"Transpose" : 2,
|
26 |
+
"SlidingWindows" : 1,
|
27 |
+
"Ios16.sub" : 3,
|
28 |
+
"Ios16.log" : 1,
|
29 |
+
"Ios16.reduceMean" : 2,
|
30 |
+
"Ios16.square" : 2,
|
31 |
+
"Squeeze" : 2,
|
32 |
+
"Ios16.matmul" : 2,
|
33 |
+
"Ios16.add" : 1,
|
34 |
+
"Ios16.linear" : 1,
|
35 |
+
"ExpandDims" : 4,
|
36 |
+
"Ios16.gather" : 2,
|
37 |
+
"Ios16.maximum" : 1,
|
38 |
+
"Identity" : 1,
|
39 |
+
"Pad" : 2
|
40 |
+
},
|
41 |
+
"computePrecision" : "Mixed (Float16, Float32, Int32)",
|
42 |
+
"isUpdatable" : "0",
|
43 |
+
"stateSchema" : [
|
44 |
+
|
45 |
+
],
|
46 |
+
"availability" : {
|
47 |
+
"macOS" : "13.0",
|
48 |
+
"tvOS" : "16.0",
|
49 |
+
"visionOS" : "1.0",
|
50 |
+
"watchOS" : "9.0",
|
51 |
+
"iOS" : "16.0",
|
52 |
+
"macCatalyst" : "16.0"
|
53 |
+
},
|
54 |
+
"modelType" : {
|
55 |
+
"name" : "MLModelType_mlProgram"
|
56 |
+
},
|
57 |
+
"userDefinedMetadata" : {
|
58 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript",
|
59 |
+
"com.github.apple.coremltools.source" : "torch==2.6.0",
|
60 |
+
"com.github.apple.coremltools.version" : "8.2"
|
61 |
+
},
|
62 |
+
"inputSchema" : [
|
63 |
+
{
|
64 |
+
"hasShapeFlexibility" : "0",
|
65 |
+
"isOptional" : "0",
|
66 |
+
"dataType" : "Float16",
|
67 |
+
"formattedType" : "MultiArray (Float16 1 × 480000)",
|
68 |
+
"shortDescription" : "",
|
69 |
+
"shape" : "[1, 480000]",
|
70 |
+
"name" : "waveforms",
|
71 |
+
"type" : "MultiArray"
|
72 |
+
}
|
73 |
+
],
|
74 |
+
"generatedClassName" : "SpeakerEmbeddingPreprocessor",
|
75 |
+
"method" : "predict"
|
76 |
+
}
|
77 |
+
]
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
program(1.0)
|
2 |
+
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}, {"coremltools-component-torch", "2.6.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.2"}})]
|
3 |
+
{
|
4 |
+
func main<ios16>(tensor<fp16, [1, 480000]> waveforms) {
|
5 |
+
tensor<string, []> cast_0_dtype_0 = const()[name = tensor<string, []>("cast_0_dtype_0"), val = tensor<string, []>("fp32")];
|
6 |
+
tensor<fp32, []> var_2_promoted = const()[name = tensor<string, []>("op_2_promoted"), val = tensor<fp32, []>(0x1p+15)];
|
7 |
+
tensor<fp32, [1, 480000]> cast_0 = cast(dtype = cast_0_dtype_0, x = waveforms)[name = tensor<string, []>("cast_11")];
|
8 |
+
tensor<fp32, [1, 480000]> waveform_1 = mul(x = cast_0, y = var_2_promoted)[name = tensor<string, []>("waveform_1")];
|
9 |
+
tensor<int32, [2]> var_6_begin_0 = const()[name = tensor<string, []>("op_6_begin_0"), val = tensor<int32, [2]>([0, 0])];
|
10 |
+
tensor<int32, [2]> var_6_end_0 = const()[name = tensor<string, []>("op_6_end_0"), val = tensor<int32, [2]>([1, 480000])];
|
11 |
+
tensor<bool, [2]> var_6_end_mask_0 = const()[name = tensor<string, []>("op_6_end_mask_0"), val = tensor<bool, [2]>([false, true])];
|
12 |
+
tensor<bool, [2]> var_6_squeeze_mask_0 = const()[name = tensor<string, []>("op_6_squeeze_mask_0"), val = tensor<bool, [2]>([true, false])];
|
13 |
+
tensor<fp32, [480000]> var_6 = slice_by_index(begin = var_6_begin_0, end = var_6_end_0, end_mask = var_6_end_mask_0, squeeze_mask = var_6_squeeze_mask_0, x = waveform_1)[name = tensor<string, []>("op_6")];
|
14 |
+
tensor<int32, []> sliding_windows_0_axis_0 = const()[name = tensor<string, []>("sliding_windows_0_axis_0"), val = tensor<int32, []>(0)];
|
15 |
+
tensor<int32, []> sliding_windows_0_size_0 = const()[name = tensor<string, []>("sliding_windows_0_size_0"), val = tensor<int32, []>(400)];
|
16 |
+
tensor<int32, []> sliding_windows_0_stride_0 = const()[name = tensor<string, []>("sliding_windows_0_stride_0"), val = tensor<int32, []>(160)];
|
17 |
+
tensor<fp32, [2998, 400]> sliding_windows_0 = sliding_windows(axis = sliding_windows_0_axis_0, size = sliding_windows_0_size_0, stride = sliding_windows_0_stride_0, x = var_6)[name = tensor<string, []>("sliding_windows_0")];
|
18 |
+
tensor<int32, [1]> var_42_axes_0 = const()[name = tensor<string, []>("op_42_axes_0"), val = tensor<int32, [1]>([1])];
|
19 |
+
tensor<bool, []> var_42_keep_dims_0 = const()[name = tensor<string, []>("op_42_keep_dims_0"), val = tensor<bool, []>(false)];
|
20 |
+
tensor<fp32, [2998]> var_42 = reduce_mean(axes = var_42_axes_0, keep_dims = var_42_keep_dims_0, x = sliding_windows_0)[name = tensor<string, []>("op_42")];
|
21 |
+
tensor<int32, [1]> row_means_axes_0 = const()[name = tensor<string, []>("row_means_axes_0"), val = tensor<int32, [1]>([1])];
|
22 |
+
tensor<fp32, [2998, 1]> row_means = expand_dims(axes = row_means_axes_0, x = var_42)[name = tensor<string, []>("row_means")];
|
23 |
+
tensor<fp32, [2998, 400]> strided_input_3 = sub(x = sliding_windows_0, y = row_means)[name = tensor<string, []>("strided_input_3")];
|
24 |
+
tensor<int32, [1]> input_1_axes_0 = const()[name = tensor<string, []>("input_1_axes_0"), val = tensor<int32, [1]>([0])];
|
25 |
+
tensor<fp32, [1, 2998, 400]> input_1 = expand_dims(axes = input_1_axes_0, x = strided_input_3)[name = tensor<string, []>("input_1")];
|
26 |
+
tensor<fp32, []> const_2 = const()[name = tensor<string, []>("const_2"), val = tensor<fp32, []>(0x0p+0)];
|
27 |
+
tensor<int32, [6]> var_54_pad_0 = const()[name = tensor<string, []>("op_54_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 1, 0])];
|
28 |
+
tensor<string, []> var_54_mode_0 = const()[name = tensor<string, []>("op_54_mode_0"), val = tensor<string, []>("replicate")];
|
29 |
+
tensor<fp32, [1, 2998, 401]> var_54 = pad(constant_val = const_2, mode = var_54_mode_0, pad = var_54_pad_0, x = input_1)[name = tensor<string, []>("op_54")];
|
30 |
+
tensor<int32, [1]> offset_strided_input_axes_0 = const()[name = tensor<string, []>("offset_strided_input_axes_0"), val = tensor<int32, [1]>([0])];
|
31 |
+
tensor<fp32, [2998, 401]> offset_strided_input = squeeze(axes = offset_strided_input_axes_0, x = var_54)[name = tensor<string, []>("offset_strided_input")];
|
32 |
+
tensor<int32, [2]> var_66_begin_0 = const()[name = tensor<string, []>("op_66_begin_0"), val = tensor<int32, [2]>([0, 0])];
|
33 |
+
tensor<int32, [2]> var_66_end_0 = const()[name = tensor<string, []>("op_66_end_0"), val = tensor<int32, [2]>([2998, 400])];
|
34 |
+
tensor<bool, [2]> var_66_end_mask_0 = const()[name = tensor<string, []>("op_66_end_mask_0"), val = tensor<bool, [2]>([true, false])];
|
35 |
+
tensor<fp32, [2998, 400]> var_66 = slice_by_index(begin = var_66_begin_0, end = var_66_end_0, end_mask = var_66_end_mask_0, x = offset_strided_input)[name = tensor<string, []>("op_66")];
|
36 |
+
tensor<fp32, []> var_67 = const()[name = tensor<string, []>("op_67"), val = tensor<fp32, []>(0x1.f0a3d8p-1)];
|
37 |
+
tensor<fp32, [2998, 400]> var_68 = mul(x = var_66, y = var_67)[name = tensor<string, []>("op_68")];
|
38 |
+
tensor<fp32, [2998, 400]> strided_input_5 = sub(x = strided_input_3, y = var_68)[name = tensor<string, []>("strided_input_5")];
|
39 |
+
tensor<fp32, [1, 400]> window_function = const()[name = tensor<string, []>("window_function"), val = tensor<fp32, [1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
|
40 |
+
tensor<fp32, [2998, 400]> strided_input_7 = mul(x = strided_input_5, y = window_function)[name = tensor<string, []>("strided_input_7")];
|
41 |
+
tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([0])];
|
42 |
+
tensor<fp32, [1, 2998, 400]> input_3 = expand_dims(axes = input_3_axes_0, x = strided_input_7)[name = tensor<string, []>("input_3")];
|
43 |
+
tensor<fp32, []> const_3 = const()[name = tensor<string, []>("const_3"), val = tensor<fp32, []>(0x0p+0)];
|
44 |
+
tensor<int32, [6]> var_90_pad_0 = const()[name = tensor<string, []>("op_90_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 0, 112])];
|
45 |
+
tensor<string, []> var_90_mode_0 = const()[name = tensor<string, []>("op_90_mode_0"), val = tensor<string, []>("constant")];
|
46 |
+
tensor<fp32, [1, 2998, 512]> var_90 = pad(constant_val = const_3, mode = var_90_mode_0, pad = var_90_pad_0, x = input_3)[name = tensor<string, []>("op_90")];
|
47 |
+
tensor<int32, [1]> strided_input_axes_0 = const()[name = tensor<string, []>("strided_input_axes_0"), val = tensor<int32, [1]>([0])];
|
48 |
+
tensor<fp32, [2998, 512]> strided_input = squeeze(axes = strided_input_axes_0, x = var_90)[name = tensor<string, []>("strided_input")];
|
49 |
+
tensor<fp32, [512, 512]> cos_0 = const()[name = tensor<string, []>("cos_0"), val = tensor<fp32, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1728)))];
|
50 |
+
tensor<fp32, [512, 512]> sin_0 = const()[name = tensor<string, []>("sin_0"), val = tensor<fp32, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050368)))];
|
51 |
+
tensor<bool, []> matmul_1_transpose_x_1 = const()[name = tensor<string, []>("matmul_1_transpose_x_1"), val = tensor<bool, []>(false)];
|
52 |
+
tensor<bool, []> matmul_1_transpose_y_1 = const()[name = tensor<string, []>("matmul_1_transpose_y_1"), val = tensor<bool, []>(true)];
|
53 |
+
tensor<fp32, [512, 2998]> matmul_1 = matmul(transpose_x = matmul_1_transpose_x_1, transpose_y = matmul_1_transpose_y_1, x = cos_0, y = strided_input)[name = tensor<string, []>("matmul_1")];
|
54 |
+
tensor<bool, []> matmul_3_transpose_x_1 = const()[name = tensor<string, []>("matmul_3_transpose_x_1"), val = tensor<bool, []>(false)];
|
55 |
+
tensor<bool, []> matmul_3_transpose_y_1 = const()[name = tensor<string, []>("matmul_3_transpose_y_1"), val = tensor<bool, []>(true)];
|
56 |
+
tensor<fp32, [512, 2998]> matmul_3 = matmul(transpose_x = matmul_3_transpose_x_1, transpose_y = matmul_3_transpose_y_1, x = sin_0, y = strided_input)[name = tensor<string, []>("matmul_3")];
|
57 |
+
tensor<fp32, []> mul_1_y_0 = const()[name = tensor<string, []>("mul_1_y_0"), val = tensor<fp32, []>(-0x1p+0)];
|
58 |
+
tensor<fp32, [512, 2998]> mul_1 = mul(x = matmul_3, y = mul_1_y_0)[name = tensor<string, []>("mul_1")];
|
59 |
+
tensor<int32, [2]> transpose_3_perm_0 = const()[name = tensor<string, []>("transpose_3_perm_0"), val = tensor<int32, [2]>([-1, 0])];
|
60 |
+
tensor<int32, [2]> transpose_4_perm_0 = const()[name = tensor<string, []>("transpose_4_perm_0"), val = tensor<int32, [2]>([-1, 0])];
|
61 |
+
tensor<int32, [257]> range_1d_2 = const()[name = tensor<string, []>("range_1d_2"), val = tensor<int32, [257]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256])];
|
62 |
+
tensor<int32, []> gather_0_axis_0 = const()[name = tensor<string, []>("gather_0_axis_0"), val = tensor<int32, []>(-1)];
|
63 |
+
tensor<int32, []> gather_0_batch_dims_0 = const()[name = tensor<string, []>("gather_0_batch_dims_0"), val = tensor<int32, []>(0)];
|
64 |
+
tensor<fp32, [2998, 512]> transpose_3 = transpose(perm = transpose_3_perm_0, x = matmul_1)[name = tensor<string, []>("transpose_6")];
|
65 |
+
tensor<fp32, [2998, 257]> gather_0 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = range_1d_2, x = transpose_3)[name = tensor<string, []>("gather_0")];
|
66 |
+
tensor<int32, []> gather_1_axis_0 = const()[name = tensor<string, []>("gather_1_axis_0"), val = tensor<int32, []>(-1)];
|
67 |
+
tensor<int32, []> gather_1_batch_dims_0 = const()[name = tensor<string, []>("gather_1_batch_dims_0"), val = tensor<int32, []>(0)];
|
68 |
+
tensor<fp32, [2998, 512]> transpose_4 = transpose(perm = transpose_4_perm_0, x = mul_1)[name = tensor<string, []>("transpose_5")];
|
69 |
+
tensor<fp32, [2998, 257]> gather_1 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = range_1d_2, x = transpose_4)[name = tensor<string, []>("gather_1")];
|
70 |
+
tensor<fp32, [2998, 257]> square_0 = square(x = gather_0)[name = tensor<string, []>("square_0")];
|
71 |
+
tensor<fp32, [2998, 257]> square_1 = square(x = gather_1)[name = tensor<string, []>("square_1")];
|
72 |
+
tensor<fp32, [2998, 257]> add_1 = add(x = square_0, y = square_1)[name = tensor<string, []>("add_1")];
|
73 |
+
tensor<fp32, [2998, 257]> spectrum = identity(x = add_1)[name = tensor<string, []>("spectrum")];
|
74 |
+
tensor<fp32, [80, 257]> mel_energies_3 = const()[name = tensor<string, []>("mel_energies_3"), val = tensor<fp32, [80, 257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2099008)))];
|
75 |
+
tensor<fp32, [80]> mel_energies_bias_0 = const()[name = tensor<string, []>("mel_energies_bias_0"), val = tensor<fp32, [80]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2181312)))];
|
76 |
+
tensor<fp32, [2998, 80]> mel_energies = linear(bias = mel_energies_bias_0, weight = mel_energies_3, x = spectrum)[name = tensor<string, []>("mel_energies")];
|
77 |
+
tensor<fp32, []> const_10 = const()[name = tensor<string, []>("const_10"), val = tensor<fp32, []>(0x1p-23)];
|
78 |
+
tensor<fp32, [2998, 80]> var_186 = maximum(x = mel_energies, y = const_10)[name = tensor<string, []>("op_186")];
|
79 |
+
tensor<fp32, []> filter_banks_epsilon_0 = const()[name = tensor<string, []>("filter_banks_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
|
80 |
+
tensor<fp32, [2998, 80]> filter_banks = log(epsilon = filter_banks_epsilon_0, x = var_186)[name = tensor<string, []>("filter_banks")];
|
81 |
+
tensor<int32, [1]> var_192_axes_0 = const()[name = tensor<string, []>("op_192_axes_0"), val = tensor<int32, [1]>([0])];
|
82 |
+
tensor<bool, []> var_192_keep_dims_0 = const()[name = tensor<string, []>("op_192_keep_dims_0"), val = tensor<bool, []>(true)];
|
83 |
+
tensor<fp32, [1, 80]> var_192 = reduce_mean(axes = var_192_axes_0, keep_dims = var_192_keep_dims_0, x = filter_banks)[name = tensor<string, []>("op_192")];
|
84 |
+
tensor<fp32, [2998, 80]> var_194 = sub(x = filter_banks, y = var_192)[name = tensor<string, []>("op_194")];
|
85 |
+
tensor<int32, [1]> obj_axes_0 = const()[name = tensor<string, []>("obj_axes_0"), val = tensor<int32, [1]>([0])];
|
86 |
+
tensor<fp32, [1, 2998, 80]> preprocessor_output_1_type_fp32 = expand_dims(axes = obj_axes_0, x = var_194)[name = tensor<string, []>("obj")];
|
87 |
+
tensor<string, []> cast_9_dtype_0 = const()[name = tensor<string, []>("cast_9_dtype_0"), val = tensor<string, []>("fp16")];
|
88 |
+
tensor<fp16, [1, 2998, 80]> preprocessor_output_1 = cast(dtype = cast_9_dtype_0, x = preprocessor_output_1_type_fp32)[name = tensor<string, []>("cast_10")];
|
89 |
+
} -> (preprocessor_output_1);
|
90 |
+
}
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f2c284bd22f1f7ab76901c1c6e57f82d4ebbf057fa0b924aad057f124f77a89
|
3 |
+
size 2181696
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Argmax proprietary and confidential. Under NDA.
|
2 |
+
|
3 |
+
Copyright 2025 Argmax, Inc. All rights reserved.
|
4 |
+
|
5 |
+
Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
|
6 |
+
|
7 |
+
Please contact Argmax for licensing information at [email protected].
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# License
|
2 |
+
|
3 |
+
Original model weights: https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE
|
4 |
+
Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
|
5 |
+
|
6 |
+
Please contact [email protected] for licensing SpeakerKit Pro assets
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Argmax proprietary and confidential. Under NDA.
|
2 |
+
|
3 |
+
Copyright 2025 Argmax, Inc. All rights reserved.
|
4 |
+
|
5 |
+
Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
|
6 |
+
|
7 |
+
Please contact Argmax for licensing information at [email protected].
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# License
|
2 |
+
|
3 |
+
Original model weights: https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE
|
4 |
+
Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
|
5 |
+
|
6 |
+
Please contact [email protected] for licensing SpeakerKit Pro assets
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f3593627d0e4a99c4537655e8339aa5f786a175f913194781af7c6ef3b969f7
|
3 |
+
size 243
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d92ffe80d66daa38293970d33fef64924b89b3f8d8ee71606e1a1e03e19d200
|
3 |
+
size 519
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"metadataOutputVersion" : "3.0",
|
4 |
+
"storagePrecision" : "Float32",
|
5 |
+
"outputSchema" : [
|
6 |
+
{
|
7 |
+
"hasShapeFlexibility" : "0",
|
8 |
+
"isOptional" : "0",
|
9 |
+
"dataType" : "Float16",
|
10 |
+
"formattedType" : "MultiArray (Float16 21 × 589 × 3)",
|
11 |
+
"shortDescription" : "",
|
12 |
+
"shape" : "[21, 589, 3]",
|
13 |
+
"name" : "speaker_probs",
|
14 |
+
"type" : "MultiArray"
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"hasShapeFlexibility" : "0",
|
18 |
+
"isOptional" : "0",
|
19 |
+
"dataType" : "Float16",
|
20 |
+
"formattedType" : "MultiArray (Float16 21 × 589 × 3)",
|
21 |
+
"shortDescription" : "",
|
22 |
+
"shape" : "[21, 589, 3]",
|
23 |
+
"name" : "speaker_ids",
|
24 |
+
"type" : "MultiArray"
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"hasShapeFlexibility" : "0",
|
28 |
+
"isOptional" : "0",
|
29 |
+
"dataType" : "Float16",
|
30 |
+
"formattedType" : "MultiArray (Float16 21 × 3)",
|
31 |
+
"shortDescription" : "",
|
32 |
+
"shape" : "[21, 3]",
|
33 |
+
"name" : "speaker_activity",
|
34 |
+
"type" : "MultiArray"
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"hasShapeFlexibility" : "0",
|
38 |
+
"isOptional" : "0",
|
39 |
+
"dataType" : "Float16",
|
40 |
+
"formattedType" : "MultiArray (Float16 21 × 589)",
|
41 |
+
"shortDescription" : "",
|
42 |
+
"shape" : "[21, 589]",
|
43 |
+
"name" : "overlapped_speaker_activity",
|
44 |
+
"type" : "MultiArray"
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"hasShapeFlexibility" : "0",
|
48 |
+
"isOptional" : "0",
|
49 |
+
"dataType" : "Float16",
|
50 |
+
"formattedType" : "MultiArray (Float16 1767)",
|
51 |
+
"shortDescription" : "",
|
52 |
+
"shape" : "[1767]",
|
53 |
+
"name" : "voice_activity",
|
54 |
+
"type" : "MultiArray"
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"hasShapeFlexibility" : "0",
|
58 |
+
"isOptional" : "0",
|
59 |
+
"dataType" : "Float16",
|
60 |
+
"formattedType" : "MultiArray (Float16 21 × 1 × 160000)",
|
61 |
+
"shortDescription" : "",
|
62 |
+
"shape" : "[21, 1, 160000]",
|
63 |
+
"name" : "sliding_window_waveform",
|
64 |
+
"type" : "MultiArray"
|
65 |
+
}
|
66 |
+
],
|
67 |
+
"modelParameters" : [
|
68 |
+
|
69 |
+
],
|
70 |
+
"specificationVersion" : 7,
|
71 |
+
"mlProgramOperationTypeHistogram" : {
|
72 |
+
"Transpose" : 2,
|
73 |
+
"Ios16.maxPool" : 3,
|
74 |
+
"Ios16.exp" : 1,
|
75 |
+
"Ios16.softmax" : 1,
|
76 |
+
"SlidingWindows" : 1,
|
77 |
+
"Ios16.linear" : 5,
|
78 |
+
"Ios16.add" : 40,
|
79 |
+
"Ios16.realDiv" : 1,
|
80 |
+
"Ios16.reduceMax" : 1,
|
81 |
+
"Ios16.reduceSum" : 2,
|
82 |
+
"Ios16.reduceArgmax" : 1,
|
83 |
+
"Ios16.greater" : 1,
|
84 |
+
"Ios16.log" : 1,
|
85 |
+
"ExpandDims" : 1,
|
86 |
+
"Ios16.instanceNorm" : 4,
|
87 |
+
"Ios16.sub" : 1,
|
88 |
+
"Ios16.cast" : 9,
|
89 |
+
"Ios16.conv" : 3,
|
90 |
+
"Ios16.lstm" : 4,
|
91 |
+
"OneHot" : 1,
|
92 |
+
"SliceByIndex" : 61,
|
93 |
+
"Ios16.abs" : 1,
|
94 |
+
"Ios16.scatter" : 42,
|
95 |
+
"Ios16.mul" : 1,
|
96 |
+
"Ios16.leakyRelu" : 5
|
97 |
+
},
|
98 |
+
"computePrecision" : "Mixed (Float16, Float32, Int32)",
|
99 |
+
"isUpdatable" : "0",
|
100 |
+
"stateSchema" : [
|
101 |
+
|
102 |
+
],
|
103 |
+
"availability" : {
|
104 |
+
"macOS" : "13.0",
|
105 |
+
"tvOS" : "16.0",
|
106 |
+
"visionOS" : "1.0",
|
107 |
+
"watchOS" : "9.0",
|
108 |
+
"iOS" : "16.0",
|
109 |
+
"macCatalyst" : "16.0"
|
110 |
+
},
|
111 |
+
"modelType" : {
|
112 |
+
"name" : "MLModelType_mlProgram"
|
113 |
+
},
|
114 |
+
"userDefinedMetadata" : {
|
115 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript",
|
116 |
+
"com.github.apple.coremltools.version" : "8.2",
|
117 |
+
"com.github.apple.coremltools.source" : "torch==2.6.0"
|
118 |
+
},
|
119 |
+
"inputSchema" : [
|
120 |
+
{
|
121 |
+
"hasShapeFlexibility" : "0",
|
122 |
+
"isOptional" : "0",
|
123 |
+
"dataType" : "Float16",
|
124 |
+
"formattedType" : "MultiArray (Float16 480000)",
|
125 |
+
"shortDescription" : "",
|
126 |
+
"shape" : "[480000]",
|
127 |
+
"name" : "waveform",
|
128 |
+
"type" : "MultiArray"
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"hasShapeFlexibility" : "0",
|
132 |
+
"isOptional" : "0",
|
133 |
+
"dataType" : "Float16",
|
134 |
+
"formattedType" : "MultiArray (Float16 7)",
|
135 |
+
"shortDescription" : "",
|
136 |
+
"shape" : "[7]",
|
137 |
+
"name" : "input_1",
|
138 |
+
"type" : "MultiArray"
|
139 |
+
}
|
140 |
+
],
|
141 |
+
"generatedClassName" : "SpeakerSegmenter",
|
142 |
+
"method" : "predict"
|
143 |
+
}
|
144 |
+
]
|
The diff for this file is too large to render.
See raw diff
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e76cb6e08ccbc6a007135fca00179f5c72ed6b0878e1584b129614388ee6909b
|
3 |
+
size 5990900
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Argmax proprietary and confidential. Under NDA.
|
2 |
+
|
3 |
+
Copyright 2025 Argmax, Inc. All rights reserved.
|
4 |
+
|
5 |
+
Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
|
6 |
+
|
7 |
+
Please contact Argmax for licensing information at [email protected].
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# License
|
2 |
+
|
3 |
+
Original model weights: https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE
|
4 |
+
Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
|
5 |
+
|
6 |
+
Please contact [email protected] for licensing SpeakerKit Pro assets
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25923d134e4f141b7e5284d06144f7b5961eb43b3a7027495bb54e322706570b
|
3 |
+
size 243
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d92ffe80d66daa38293970d33fef64924b89b3f8d8ee71606e1a1e03e19d200
|
3 |
+
size 519
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"metadataOutputVersion" : "3.0",
|
4 |
+
"storagePrecision" : "Mixed (Float16, Float32, Palettized (8 bits))",
|
5 |
+
"outputSchema" : [
|
6 |
+
{
|
7 |
+
"hasShapeFlexibility" : "0",
|
8 |
+
"isOptional" : "0",
|
9 |
+
"dataType" : "Float16",
|
10 |
+
"formattedType" : "MultiArray (Float16 21 × 589 × 3)",
|
11 |
+
"shortDescription" : "",
|
12 |
+
"shape" : "[21, 589, 3]",
|
13 |
+
"name" : "speaker_probs",
|
14 |
+
"type" : "MultiArray"
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"hasShapeFlexibility" : "0",
|
18 |
+
"isOptional" : "0",
|
19 |
+
"dataType" : "Float16",
|
20 |
+
"formattedType" : "MultiArray (Float16 21 × 589 × 3)",
|
21 |
+
"shortDescription" : "",
|
22 |
+
"shape" : "[21, 589, 3]",
|
23 |
+
"name" : "speaker_ids",
|
24 |
+
"type" : "MultiArray"
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"hasShapeFlexibility" : "0",
|
28 |
+
"isOptional" : "0",
|
29 |
+
"dataType" : "Float16",
|
30 |
+
"formattedType" : "MultiArray (Float16 21 × 3)",
|
31 |
+
"shortDescription" : "",
|
32 |
+
"shape" : "[21, 3]",
|
33 |
+
"name" : "speaker_activity",
|
34 |
+
"type" : "MultiArray"
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"hasShapeFlexibility" : "0",
|
38 |
+
"isOptional" : "0",
|
39 |
+
"dataType" : "Float16",
|
40 |
+
"formattedType" : "MultiArray (Float16 21 × 589)",
|
41 |
+
"shortDescription" : "",
|
42 |
+
"shape" : "[21, 589]",
|
43 |
+
"name" : "overlapped_speaker_activity",
|
44 |
+
"type" : "MultiArray"
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"hasShapeFlexibility" : "0",
|
48 |
+
"isOptional" : "0",
|
49 |
+
"dataType" : "Float16",
|
50 |
+
"formattedType" : "MultiArray (Float16 1767)",
|
51 |
+
"shortDescription" : "",
|
52 |
+
"shape" : "[1767]",
|
53 |
+
"name" : "voice_activity",
|
54 |
+
"type" : "MultiArray"
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"hasShapeFlexibility" : "0",
|
58 |
+
"isOptional" : "0",
|
59 |
+
"dataType" : "Float16",
|
60 |
+
"formattedType" : "MultiArray (Float16 21 × 1 × 160000)",
|
61 |
+
"shortDescription" : "",
|
62 |
+
"shape" : "[21, 1, 160000]",
|
63 |
+
"name" : "sliding_window_waveform",
|
64 |
+
"type" : "MultiArray"
|
65 |
+
}
|
66 |
+
],
|
67 |
+
"modelParameters" : [
|
68 |
+
|
69 |
+
],
|
70 |
+
"specificationVersion" : 7,
|
71 |
+
"mlProgramOperationTypeHistogram" : {
|
72 |
+
"Transpose" : 2,
|
73 |
+
"Ios16.maxPool" : 3,
|
74 |
+
"Ios16.exp" : 1,
|
75 |
+
"Ios16.softmax" : 1,
|
76 |
+
"SlidingWindows" : 1,
|
77 |
+
"Ios16.linear" : 5,
|
78 |
+
"Ios16.add" : 40,
|
79 |
+
"Ios16.realDiv" : 1,
|
80 |
+
"Ios16.reduceMax" : 1,
|
81 |
+
"Ios16.reduceSum" : 2,
|
82 |
+
"Ios16.reduceArgmax" : 1,
|
83 |
+
"Ios16.greater" : 1,
|
84 |
+
"Ios16.log" : 1,
|
85 |
+
"ExpandDims" : 1,
|
86 |
+
"Ios16.instanceNorm" : 4,
|
87 |
+
"Ios16.sub" : 1,
|
88 |
+
"Ios16.cast" : 4,
|
89 |
+
"Ios16.conv" : 3,
|
90 |
+
"Ios16.constexprLutToDense" : 22,
|
91 |
+
"OneHot" : 1,
|
92 |
+
"Ios16.abs" : 1,
|
93 |
+
"Ios16.lstm" : 4,
|
94 |
+
"SliceByIndex" : 61,
|
95 |
+
"Ios16.scatter" : 42,
|
96 |
+
"Ios16.mul" : 1,
|
97 |
+
"Ios16.leakyRelu" : 5
|
98 |
+
},
|
99 |
+
"computePrecision" : "Mixed (Float16, Float32, Int32)",
|
100 |
+
"isUpdatable" : "0",
|
101 |
+
"stateSchema" : [
|
102 |
+
|
103 |
+
],
|
104 |
+
"availability" : {
|
105 |
+
"macOS" : "13.0",
|
106 |
+
"tvOS" : "16.0",
|
107 |
+
"visionOS" : "1.0",
|
108 |
+
"watchOS" : "9.0",
|
109 |
+
"iOS" : "16.0",
|
110 |
+
"macCatalyst" : "16.0"
|
111 |
+
},
|
112 |
+
"modelType" : {
|
113 |
+
"name" : "MLModelType_mlProgram"
|
114 |
+
},
|
115 |
+
"userDefinedMetadata" : {
|
116 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript",
|
117 |
+
"com.github.apple.coremltools.version" : "8.2",
|
118 |
+
"com.github.apple.coremltools.source" : "torch==2.6.0"
|
119 |
+
},
|
120 |
+
"inputSchema" : [
|
121 |
+
{
|
122 |
+
"hasShapeFlexibility" : "0",
|
123 |
+
"isOptional" : "0",
|
124 |
+
"dataType" : "Float16",
|
125 |
+
"formattedType" : "MultiArray (Float16 480000)",
|
126 |
+
"shortDescription" : "",
|
127 |
+
"shape" : "[480000]",
|
128 |
+
"name" : "waveform",
|
129 |
+
"type" : "MultiArray"
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"hasShapeFlexibility" : "0",
|
133 |
+
"isOptional" : "0",
|
134 |
+
"dataType" : "Float16",
|
135 |
+
"formattedType" : "MultiArray (Float16 7)",
|
136 |
+
"shortDescription" : "",
|
137 |
+
"shape" : "[7]",
|
138 |
+
"name" : "input_1",
|
139 |
+
"type" : "MultiArray"
|
140 |
+
}
|
141 |
+
],
|
142 |
+
"generatedClassName" : "SpeakerSegmenter_8_bit",
|
143 |
+
"method" : "predict"
|
144 |
+
}
|
145 |
+
]
|
The diff for this file is too large to render.
See raw diff
|
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18aa76153cdb04fc74b8b3ab438d96d0f297aba084f4ebc7ea2b7dcc611c1838
|
3 |
+
size 1537882
|