arda-argmax commited on
Commit
ab5bc64
·
verified ·
1 Parent(s): a6fb336

New pyannote-v3-pro (#1)

Browse files

- New pyannote-v3-pro (8c02f5e91862b3bf8a4e23ca79c301196423d673)
- New pyannote-v3-pro SpeakerSegmenter (5ea9592772d00d547da99ae3f7e5c958c78deb9f)
- Update speaker_embedder/pyannote-v3-pro/LICENSE_NOTICE.txt (c5a4c6722d64a288af1864a70f91a2930cf1aa08)
- Update speaker_embedder/pyannote-v3-pro/W16A16/LICENSE_NOTICE.txt (374499d2f345de69a51a7c2567e0e462514ff7b8)
- Update speaker_embedder/pyannote-v3-pro/W8A16/LICENSE_NOTICE.txt (f1960989a5088756b8cbfdde1fa9c75bd7c1adcd)
- Update speaker_segmenter/pyannote-v3-pro/LICENSE_NOTICE.txt (f6b4119078515307b830d24254f50a766df950fd)
- Update speaker_segmenter/pyannote-v3-pro/W32A32/LICENSE_NOTICE.txt (b6731a299fd0f81f5617aaf6f246d62722ddafa2)
- Update speaker_segmenter/pyannote-v3-pro/W8A16/LICENSE_NOTICE.txt (d82d6c440f605ad1dcb58d7925aac3a233118131)
- Delete speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbeddingPreprocessor.mlmodelc (c79f308c1ca50c9e24487d0c18f43820c06d2b1b)
- Delete speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbeddingPreprocessor.mlmodelc (9b92fca14e181a18c573d15e92f3c15ee45ddb3a)
- pyannote-v3-pro W16A16 SpeakerEmbedderPreprocessor (acdfedd1ec4e0a8802af60c32a3e41dd115e0494)
- pyannote-v3-pro W8A16 SpeakerEmbedderPreprocessor (29c808ac698e3a6c48f752a05162321f1883f57e)

Files changed (42) hide show
  1. speaker_embedder/pyannote-v3-pro/LICENSE_NOTICE.txt +7 -0
  2. speaker_embedder/pyannote-v3-pro/README.txt +6 -0
  3. speaker_embedder/pyannote-v3-pro/W16A16/LICENSE_NOTICE.txt +7 -0
  4. speaker_embedder/pyannote-v3-pro/W16A16/README.txt +6 -0
  5. speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/analytics/coremldata.bin +3 -0
  6. speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/coremldata.bin +3 -0
  7. speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/metadata.json +86 -0
  8. speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/model.mil +0 -0
  9. speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/weights/weight.bin +3 -0
  10. speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/analytics/coremldata.bin +3 -0
  11. speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/coremldata.bin +3 -0
  12. speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/metadata.json +77 -0
  13. speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/model.mil +90 -0
  14. speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/weights/weight.bin +3 -0
  15. speaker_embedder/pyannote-v3-pro/W8A16/LICENSE_NOTICE.txt +7 -0
  16. speaker_embedder/pyannote-v3-pro/W8A16/README.txt +6 -0
  17. speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/analytics/coremldata.bin +3 -0
  18. speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/coremldata.bin +3 -0
  19. speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/metadata.json +87 -0
  20. speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/model.mil +0 -0
  21. speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/weights/weight.bin +3 -0
  22. speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/analytics/coremldata.bin +3 -0
  23. speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/coremldata.bin +3 -0
  24. speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/metadata.json +77 -0
  25. speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/model.mil +90 -0
  26. speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/weights/weight.bin +3 -0
  27. speaker_segmenter/pyannote-v3-pro/LICENSE_NOTICE.txt +7 -0
  28. speaker_segmenter/pyannote-v3-pro/README.txt +6 -0
  29. speaker_segmenter/pyannote-v3-pro/W32A32/LICENSE_NOTICE.txt +7 -0
  30. speaker_segmenter/pyannote-v3-pro/W32A32/README.txt +6 -0
  31. speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/analytics/coremldata.bin +3 -0
  32. speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/coremldata.bin +3 -0
  33. speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/metadata.json +144 -0
  34. speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/model.mil +0 -0
  35. speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/weights/weight.bin +3 -0
  36. speaker_segmenter/pyannote-v3-pro/W8A16/LICENSE_NOTICE.txt +7 -0
  37. speaker_segmenter/pyannote-v3-pro/W8A16/README.txt +6 -0
  38. speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/analytics/coremldata.bin +3 -0
  39. speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/coremldata.bin +3 -0
  40. speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/metadata.json +145 -0
  41. speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/model.mil +0 -0
  42. speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/weights/weight.bin +3 -0
speaker_embedder/pyannote-v3-pro/LICENSE_NOTICE.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Argmax proprietary and confidential. Under NDA.
2
+
3
+ Copyright 2025 Argmax, Inc. All rights reserved.
4
+
5
+ Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
6
+
7
+ Please contact Argmax for licensing information at [email protected].
speaker_embedder/pyannote-v3-pro/README.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # License
2
+
3
+ Original model weights: https://github.com/wenet-e2e/wespeaker/blob/master/docs/pretrained.md#model-license
4
+ Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
5
+
6
+ Please contact [email protected] for licensing SpeakerKit Pro assets
speaker_embedder/pyannote-v3-pro/W16A16/LICENSE_NOTICE.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Argmax proprietary and confidential. Under NDA.
2
+
3
+ Copyright 2025 Argmax, Inc. All rights reserved.
4
+
5
+ Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
6
+
7
+ Please contact Argmax for licensing information at [email protected].
speaker_embedder/pyannote-v3-pro/W16A16/README.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # License
2
+
3
+ Original model weights: https://github.com/wenet-e2e/wespeaker/blob/master/docs/pretrained.md#model-license
4
+ Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
5
+
6
+ Please contact [email protected] for licensing SpeakerKit Pro assets
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31ebd86f9f3a87ee2bb1aa32722968e3f6821d6b393171bb6b92683213f173e1
3
+ size 243
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bbb69d66cdb7f69a6c9d4dc1ff9b114d6097bb69581e12a706cf16e27ce10be
3
+ size 370
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/metadata.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float16",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 64 × 256)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 64, 256]",
13
+ "name" : "speaker_embeddings",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "modelParameters" : [
18
+
19
+ ],
20
+ "specificationVersion" : 7,
21
+ "mlProgramOperationTypeHistogram" : {
22
+ "Concat" : 64,
23
+ "Ios16.mul" : 256,
24
+ "SliceByIndex" : 64,
25
+ "Transpose" : 1,
26
+ "Ios16.sub" : 128,
27
+ "Ios16.sqrt" : 64,
28
+ "Stack" : 1,
29
+ "UpsampleNearestNeighbor" : 1,
30
+ "Ios16.conv" : 36,
31
+ "Ios16.add" : 144,
32
+ "Squeeze" : 1,
33
+ "Ios16.relu" : 33,
34
+ "Ios16.realDiv" : 192,
35
+ "Ios16.reduceSum" : 256,
36
+ "ExpandDims" : 130,
37
+ "Ios16.linear" : 1,
38
+ "Ios16.reshape" : 1
39
+ },
40
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
41
+ "isUpdatable" : "0",
42
+ "stateSchema" : [
43
+
44
+ ],
45
+ "availability" : {
46
+ "macOS" : "13.0",
47
+ "tvOS" : "16.0",
48
+ "visionOS" : "1.0",
49
+ "watchOS" : "9.0",
50
+ "iOS" : "16.0",
51
+ "macCatalyst" : "16.0"
52
+ },
53
+ "modelType" : {
54
+ "name" : "MLModelType_mlProgram"
55
+ },
56
+ "userDefinedMetadata" : {
57
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
58
+ "com.github.apple.coremltools.version" : "8.2",
59
+ "com.github.apple.coremltools.source" : "torch==2.6.0"
60
+ },
61
+ "inputSchema" : [
62
+ {
63
+ "hasShapeFlexibility" : "0",
64
+ "isOptional" : "0",
65
+ "dataType" : "Float16",
66
+ "formattedType" : "MultiArray (Float16 1 × 998 × 80)",
67
+ "shortDescription" : "",
68
+ "shape" : "[1, 998, 80]",
69
+ "name" : "preprocessor_output_1",
70
+ "type" : "MultiArray"
71
+ },
72
+ {
73
+ "hasShapeFlexibility" : "0",
74
+ "isOptional" : "0",
75
+ "dataType" : "Float16",
76
+ "formattedType" : "MultiArray (Float16 1 × 64 × 589)",
77
+ "shortDescription" : "",
78
+ "shape" : "[1, 64, 589]",
79
+ "name" : "speaker_masks",
80
+ "type" : "MultiArray"
81
+ }
82
+ ],
83
+ "generatedClassName" : "SpeakerEmbedding",
84
+ "method" : "predict"
85
+ }
86
+ ]
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dba18a57a81b1e872802ca4def29541bb7900ccff430d9b2040092cadd7d688
3
+ size 13264960
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1383750df1af99af002f0c3403bcf19a18c3d749706eb3498d34b0fe01abf2fc
3
+ size 243
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc230803421bf4fe14f843ea5b5fa0035487fd19cdd69ed670d72560b6a44586
3
+ size 330
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/metadata.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float32",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 2998 × 80)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 2998, 80]",
13
+ "name" : "preprocessor_output_1",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "modelParameters" : [
18
+
19
+ ],
20
+ "specificationVersion" : 7,
21
+ "mlProgramOperationTypeHistogram" : {
22
+ "Ios16.cast" : 2,
23
+ "Ios16.mul" : 4,
24
+ "SliceByIndex" : 2,
25
+ "Transpose" : 2,
26
+ "SlidingWindows" : 1,
27
+ "Ios16.sub" : 3,
28
+ "Ios16.log" : 1,
29
+ "Ios16.reduceMean" : 2,
30
+ "Ios16.square" : 2,
31
+ "Squeeze" : 2,
32
+ "Ios16.matmul" : 2,
33
+ "Ios16.add" : 1,
34
+ "Ios16.linear" : 1,
35
+ "ExpandDims" : 4,
36
+ "Ios16.gather" : 2,
37
+ "Ios16.maximum" : 1,
38
+ "Identity" : 1,
39
+ "Pad" : 2
40
+ },
41
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
42
+ "isUpdatable" : "0",
43
+ "stateSchema" : [
44
+
45
+ ],
46
+ "availability" : {
47
+ "macOS" : "13.0",
48
+ "tvOS" : "16.0",
49
+ "visionOS" : "1.0",
50
+ "watchOS" : "9.0",
51
+ "iOS" : "16.0",
52
+ "macCatalyst" : "16.0"
53
+ },
54
+ "modelType" : {
55
+ "name" : "MLModelType_mlProgram"
56
+ },
57
+ "userDefinedMetadata" : {
58
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
59
+ "com.github.apple.coremltools.source" : "torch==2.6.0",
60
+ "com.github.apple.coremltools.version" : "8.2"
61
+ },
62
+ "inputSchema" : [
63
+ {
64
+ "hasShapeFlexibility" : "0",
65
+ "isOptional" : "0",
66
+ "dataType" : "Float16",
67
+ "formattedType" : "MultiArray (Float16 1 × 480000)",
68
+ "shortDescription" : "",
69
+ "shape" : "[1, 480000]",
70
+ "name" : "waveforms",
71
+ "type" : "MultiArray"
72
+ }
73
+ ],
74
+ "generatedClassName" : "SpeakerEmbeddingPreprocessor",
75
+ "method" : "predict"
76
+ }
77
+ ]
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/model.mil ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.0)
2
+ [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}, {"coremltools-component-torch", "2.6.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.2"}})]
3
+ {
4
+ func main<ios16>(tensor<fp16, [1, 480000]> waveforms) {
5
+ tensor<string, []> cast_0_dtype_0 = const()[name = tensor<string, []>("cast_0_dtype_0"), val = tensor<string, []>("fp32")];
6
+ tensor<fp32, []> var_2_promoted = const()[name = tensor<string, []>("op_2_promoted"), val = tensor<fp32, []>(0x1p+15)];
7
+ tensor<fp32, [1, 480000]> cast_0 = cast(dtype = cast_0_dtype_0, x = waveforms)[name = tensor<string, []>("cast_11")];
8
+ tensor<fp32, [1, 480000]> waveform_1 = mul(x = cast_0, y = var_2_promoted)[name = tensor<string, []>("waveform_1")];
9
+ tensor<int32, [2]> var_6_begin_0 = const()[name = tensor<string, []>("op_6_begin_0"), val = tensor<int32, [2]>([0, 0])];
10
+ tensor<int32, [2]> var_6_end_0 = const()[name = tensor<string, []>("op_6_end_0"), val = tensor<int32, [2]>([1, 480000])];
11
+ tensor<bool, [2]> var_6_end_mask_0 = const()[name = tensor<string, []>("op_6_end_mask_0"), val = tensor<bool, [2]>([false, true])];
12
+ tensor<bool, [2]> var_6_squeeze_mask_0 = const()[name = tensor<string, []>("op_6_squeeze_mask_0"), val = tensor<bool, [2]>([true, false])];
13
+ tensor<fp32, [480000]> var_6 = slice_by_index(begin = var_6_begin_0, end = var_6_end_0, end_mask = var_6_end_mask_0, squeeze_mask = var_6_squeeze_mask_0, x = waveform_1)[name = tensor<string, []>("op_6")];
14
+ tensor<int32, []> sliding_windows_0_axis_0 = const()[name = tensor<string, []>("sliding_windows_0_axis_0"), val = tensor<int32, []>(0)];
15
+ tensor<int32, []> sliding_windows_0_size_0 = const()[name = tensor<string, []>("sliding_windows_0_size_0"), val = tensor<int32, []>(400)];
16
+ tensor<int32, []> sliding_windows_0_stride_0 = const()[name = tensor<string, []>("sliding_windows_0_stride_0"), val = tensor<int32, []>(160)];
17
+ tensor<fp32, [2998, 400]> sliding_windows_0 = sliding_windows(axis = sliding_windows_0_axis_0, size = sliding_windows_0_size_0, stride = sliding_windows_0_stride_0, x = var_6)[name = tensor<string, []>("sliding_windows_0")];
18
+ tensor<int32, [1]> var_42_axes_0 = const()[name = tensor<string, []>("op_42_axes_0"), val = tensor<int32, [1]>([1])];
19
+ tensor<bool, []> var_42_keep_dims_0 = const()[name = tensor<string, []>("op_42_keep_dims_0"), val = tensor<bool, []>(false)];
20
+ tensor<fp32, [2998]> var_42 = reduce_mean(axes = var_42_axes_0, keep_dims = var_42_keep_dims_0, x = sliding_windows_0)[name = tensor<string, []>("op_42")];
21
+ tensor<int32, [1]> row_means_axes_0 = const()[name = tensor<string, []>("row_means_axes_0"), val = tensor<int32, [1]>([1])];
22
+ tensor<fp32, [2998, 1]> row_means = expand_dims(axes = row_means_axes_0, x = var_42)[name = tensor<string, []>("row_means")];
23
+ tensor<fp32, [2998, 400]> strided_input_3 = sub(x = sliding_windows_0, y = row_means)[name = tensor<string, []>("strided_input_3")];
24
+ tensor<int32, [1]> input_1_axes_0 = const()[name = tensor<string, []>("input_1_axes_0"), val = tensor<int32, [1]>([0])];
25
+ tensor<fp32, [1, 2998, 400]> input_1 = expand_dims(axes = input_1_axes_0, x = strided_input_3)[name = tensor<string, []>("input_1")];
26
+ tensor<fp32, []> const_2 = const()[name = tensor<string, []>("const_2"), val = tensor<fp32, []>(0x0p+0)];
27
+ tensor<int32, [6]> var_54_pad_0 = const()[name = tensor<string, []>("op_54_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 1, 0])];
28
+ tensor<string, []> var_54_mode_0 = const()[name = tensor<string, []>("op_54_mode_0"), val = tensor<string, []>("replicate")];
29
+ tensor<fp32, [1, 2998, 401]> var_54 = pad(constant_val = const_2, mode = var_54_mode_0, pad = var_54_pad_0, x = input_1)[name = tensor<string, []>("op_54")];
30
+ tensor<int32, [1]> offset_strided_input_axes_0 = const()[name = tensor<string, []>("offset_strided_input_axes_0"), val = tensor<int32, [1]>([0])];
31
+ tensor<fp32, [2998, 401]> offset_strided_input = squeeze(axes = offset_strided_input_axes_0, x = var_54)[name = tensor<string, []>("offset_strided_input")];
32
+ tensor<int32, [2]> var_66_begin_0 = const()[name = tensor<string, []>("op_66_begin_0"), val = tensor<int32, [2]>([0, 0])];
33
+ tensor<int32, [2]> var_66_end_0 = const()[name = tensor<string, []>("op_66_end_0"), val = tensor<int32, [2]>([2998, 400])];
34
+ tensor<bool, [2]> var_66_end_mask_0 = const()[name = tensor<string, []>("op_66_end_mask_0"), val = tensor<bool, [2]>([true, false])];
35
+ tensor<fp32, [2998, 400]> var_66 = slice_by_index(begin = var_66_begin_0, end = var_66_end_0, end_mask = var_66_end_mask_0, x = offset_strided_input)[name = tensor<string, []>("op_66")];
36
+ tensor<fp32, []> var_67 = const()[name = tensor<string, []>("op_67"), val = tensor<fp32, []>(0x1.f0a3d8p-1)];
37
+ tensor<fp32, [2998, 400]> var_68 = mul(x = var_66, y = var_67)[name = tensor<string, []>("op_68")];
38
+ tensor<fp32, [2998, 400]> strided_input_5 = sub(x = strided_input_3, y = var_68)[name = tensor<string, []>("strided_input_5")];
39
+ tensor<fp32, [1, 400]> window_function = const()[name = tensor<string, []>("window_function"), val = tensor<fp32, [1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
40
+ tensor<fp32, [2998, 400]> strided_input_7 = mul(x = strided_input_5, y = window_function)[name = tensor<string, []>("strided_input_7")];
41
+ tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([0])];
42
+ tensor<fp32, [1, 2998, 400]> input_3 = expand_dims(axes = input_3_axes_0, x = strided_input_7)[name = tensor<string, []>("input_3")];
43
+ tensor<fp32, []> const_3 = const()[name = tensor<string, []>("const_3"), val = tensor<fp32, []>(0x0p+0)];
44
+ tensor<int32, [6]> var_90_pad_0 = const()[name = tensor<string, []>("op_90_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 0, 112])];
45
+ tensor<string, []> var_90_mode_0 = const()[name = tensor<string, []>("op_90_mode_0"), val = tensor<string, []>("constant")];
46
+ tensor<fp32, [1, 2998, 512]> var_90 = pad(constant_val = const_3, mode = var_90_mode_0, pad = var_90_pad_0, x = input_3)[name = tensor<string, []>("op_90")];
47
+ tensor<int32, [1]> strided_input_axes_0 = const()[name = tensor<string, []>("strided_input_axes_0"), val = tensor<int32, [1]>([0])];
48
+ tensor<fp32, [2998, 512]> strided_input = squeeze(axes = strided_input_axes_0, x = var_90)[name = tensor<string, []>("strided_input")];
49
+ tensor<fp32, [512, 512]> cos_0 = const()[name = tensor<string, []>("cos_0"), val = tensor<fp32, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1728)))];
50
+ tensor<fp32, [512, 512]> sin_0 = const()[name = tensor<string, []>("sin_0"), val = tensor<fp32, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050368)))];
51
+ tensor<bool, []> matmul_1_transpose_x_1 = const()[name = tensor<string, []>("matmul_1_transpose_x_1"), val = tensor<bool, []>(false)];
52
+ tensor<bool, []> matmul_1_transpose_y_1 = const()[name = tensor<string, []>("matmul_1_transpose_y_1"), val = tensor<bool, []>(true)];
53
+ tensor<fp32, [512, 2998]> matmul_1 = matmul(transpose_x = matmul_1_transpose_x_1, transpose_y = matmul_1_transpose_y_1, x = cos_0, y = strided_input)[name = tensor<string, []>("matmul_1")];
54
+ tensor<bool, []> matmul_3_transpose_x_1 = const()[name = tensor<string, []>("matmul_3_transpose_x_1"), val = tensor<bool, []>(false)];
55
+ tensor<bool, []> matmul_3_transpose_y_1 = const()[name = tensor<string, []>("matmul_3_transpose_y_1"), val = tensor<bool, []>(true)];
56
+ tensor<fp32, [512, 2998]> matmul_3 = matmul(transpose_x = matmul_3_transpose_x_1, transpose_y = matmul_3_transpose_y_1, x = sin_0, y = strided_input)[name = tensor<string, []>("matmul_3")];
57
+ tensor<fp32, []> mul_1_y_0 = const()[name = tensor<string, []>("mul_1_y_0"), val = tensor<fp32, []>(-0x1p+0)];
58
+ tensor<fp32, [512, 2998]> mul_1 = mul(x = matmul_3, y = mul_1_y_0)[name = tensor<string, []>("mul_1")];
59
+ tensor<int32, [2]> transpose_3_perm_0 = const()[name = tensor<string, []>("transpose_3_perm_0"), val = tensor<int32, [2]>([-1, 0])];
60
+ tensor<int32, [2]> transpose_4_perm_0 = const()[name = tensor<string, []>("transpose_4_perm_0"), val = tensor<int32, [2]>([-1, 0])];
61
+ tensor<int32, [257]> range_1d_2 = const()[name = tensor<string, []>("range_1d_2"), val = tensor<int32, [257]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256])];
62
+ tensor<int32, []> gather_0_axis_0 = const()[name = tensor<string, []>("gather_0_axis_0"), val = tensor<int32, []>(-1)];
63
+ tensor<int32, []> gather_0_batch_dims_0 = const()[name = tensor<string, []>("gather_0_batch_dims_0"), val = tensor<int32, []>(0)];
64
+ tensor<fp32, [2998, 512]> transpose_3 = transpose(perm = transpose_3_perm_0, x = matmul_1)[name = tensor<string, []>("transpose_6")];
65
+ tensor<fp32, [2998, 257]> gather_0 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = range_1d_2, x = transpose_3)[name = tensor<string, []>("gather_0")];
66
+ tensor<int32, []> gather_1_axis_0 = const()[name = tensor<string, []>("gather_1_axis_0"), val = tensor<int32, []>(-1)];
67
+ tensor<int32, []> gather_1_batch_dims_0 = const()[name = tensor<string, []>("gather_1_batch_dims_0"), val = tensor<int32, []>(0)];
68
+ tensor<fp32, [2998, 512]> transpose_4 = transpose(perm = transpose_4_perm_0, x = mul_1)[name = tensor<string, []>("transpose_5")];
69
+ tensor<fp32, [2998, 257]> gather_1 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = range_1d_2, x = transpose_4)[name = tensor<string, []>("gather_1")];
70
+ tensor<fp32, [2998, 257]> square_0 = square(x = gather_0)[name = tensor<string, []>("square_0")];
71
+ tensor<fp32, [2998, 257]> square_1 = square(x = gather_1)[name = tensor<string, []>("square_1")];
72
+ tensor<fp32, [2998, 257]> add_1 = add(x = square_0, y = square_1)[name = tensor<string, []>("add_1")];
73
+ tensor<fp32, [2998, 257]> spectrum = identity(x = add_1)[name = tensor<string, []>("spectrum")];
74
+ tensor<fp32, [80, 257]> mel_energies_3 = const()[name = tensor<string, []>("mel_energies_3"), val = tensor<fp32, [80, 257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2099008)))];
75
+ tensor<fp32, [80]> mel_energies_bias_0 = const()[name = tensor<string, []>("mel_energies_bias_0"), val = tensor<fp32, [80]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2181312)))];
76
+ tensor<fp32, [2998, 80]> mel_energies = linear(bias = mel_energies_bias_0, weight = mel_energies_3, x = spectrum)[name = tensor<string, []>("mel_energies")];
77
+ tensor<fp32, []> const_10 = const()[name = tensor<string, []>("const_10"), val = tensor<fp32, []>(0x1p-23)];
78
+ tensor<fp32, [2998, 80]> var_186 = maximum(x = mel_energies, y = const_10)[name = tensor<string, []>("op_186")];
79
+ tensor<fp32, []> filter_banks_epsilon_0 = const()[name = tensor<string, []>("filter_banks_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
80
+ tensor<fp32, [2998, 80]> filter_banks = log(epsilon = filter_banks_epsilon_0, x = var_186)[name = tensor<string, []>("filter_banks")];
81
+ tensor<int32, [1]> var_192_axes_0 = const()[name = tensor<string, []>("op_192_axes_0"), val = tensor<int32, [1]>([0])];
82
+ tensor<bool, []> var_192_keep_dims_0 = const()[name = tensor<string, []>("op_192_keep_dims_0"), val = tensor<bool, []>(true)];
83
+ tensor<fp32, [1, 80]> var_192 = reduce_mean(axes = var_192_axes_0, keep_dims = var_192_keep_dims_0, x = filter_banks)[name = tensor<string, []>("op_192")];
84
+ tensor<fp32, [2998, 80]> var_194 = sub(x = filter_banks, y = var_192)[name = tensor<string, []>("op_194")];
85
+ tensor<int32, [1]> obj_axes_0 = const()[name = tensor<string, []>("obj_axes_0"), val = tensor<int32, [1]>([0])];
86
+ tensor<fp32, [1, 2998, 80]> preprocessor_output_1_type_fp32 = expand_dims(axes = obj_axes_0, x = var_194)[name = tensor<string, []>("obj")];
87
+ tensor<string, []> cast_9_dtype_0 = const()[name = tensor<string, []>("cast_9_dtype_0"), val = tensor<string, []>("fp16")];
88
+ tensor<fp16, [1, 2998, 80]> preprocessor_output_1 = cast(dtype = cast_9_dtype_0, x = preprocessor_output_1_type_fp32)[name = tensor<string, []>("cast_10")];
89
+ } -> (preprocessor_output_1);
90
+ }
speaker_embedder/pyannote-v3-pro/W16A16/SpeakerEmbedderPreprocessor.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f2c284bd22f1f7ab76901c1c6e57f82d4ebbf057fa0b924aad057f124f77a89
3
+ size 2181696
speaker_embedder/pyannote-v3-pro/W8A16/LICENSE_NOTICE.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Argmax proprietary and confidential. Under NDA.
2
+
3
+ Copyright 2025 Argmax, Inc. All rights reserved.
4
+
5
+ Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
6
+
7
+ Please contact Argmax for licensing information at [email protected].
speaker_embedder/pyannote-v3-pro/W8A16/README.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # License
2
+
3
+ Original model weights: https://github.com/wenet-e2e/wespeaker/blob/master/docs/pretrained.md#model-license
4
+ Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
5
+
6
+ Please contact [email protected] for licensing SpeakerKit Pro assets
speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:642e75621f11e461372ebf68c2861b4d56b48b89027295cfac05b518f88fc8f7
3
+ size 243
speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bbb69d66cdb7f69a6c9d4dc1ff9b114d6097bb69581e12a706cf16e27ce10be
3
+ size 370
speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/metadata.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Palettized (8 bits))",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 64 × 256)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 64, 256]",
13
+ "name" : "speaker_embeddings",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "modelParameters" : [
18
+
19
+ ],
20
+ "specificationVersion" : 7,
21
+ "mlProgramOperationTypeHistogram" : {
22
+ "Concat" : 64,
23
+ "Ios16.mul" : 256,
24
+ "SliceByIndex" : 64,
25
+ "Ios16.constexprLutToDense" : 35,
26
+ "Transpose" : 1,
27
+ "Ios16.sub" : 128,
28
+ "Ios16.sqrt" : 64,
29
+ "Stack" : 1,
30
+ "UpsampleNearestNeighbor" : 1,
31
+ "Ios16.conv" : 36,
32
+ "Ios16.add" : 144,
33
+ "Squeeze" : 1,
34
+ "Ios16.relu" : 33,
35
+ "Ios16.realDiv" : 192,
36
+ "Ios16.reduceSum" : 256,
37
+ "ExpandDims" : 130,
38
+ "Ios16.linear" : 1,
39
+ "Ios16.reshape" : 1
40
+ },
41
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
42
+ "isUpdatable" : "0",
43
+ "stateSchema" : [
44
+
45
+ ],
46
+ "availability" : {
47
+ "macOS" : "13.0",
48
+ "tvOS" : "16.0",
49
+ "visionOS" : "1.0",
50
+ "watchOS" : "9.0",
51
+ "iOS" : "16.0",
52
+ "macCatalyst" : "16.0"
53
+ },
54
+ "modelType" : {
55
+ "name" : "MLModelType_mlProgram"
56
+ },
57
+ "userDefinedMetadata" : {
58
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
59
+ "com.github.apple.coremltools.source" : "torch==2.6.0",
60
+ "com.github.apple.coremltools.version" : "8.2"
61
+ },
62
+ "inputSchema" : [
63
+ {
64
+ "hasShapeFlexibility" : "0",
65
+ "isOptional" : "0",
66
+ "dataType" : "Float16",
67
+ "formattedType" : "MultiArray (Float16 1 × 998 × 80)",
68
+ "shortDescription" : "",
69
+ "shape" : "[1, 998, 80]",
70
+ "name" : "preprocessor_output_1",
71
+ "type" : "MultiArray"
72
+ },
73
+ {
74
+ "hasShapeFlexibility" : "0",
75
+ "isOptional" : "0",
76
+ "dataType" : "Float16",
77
+ "formattedType" : "MultiArray (Float16 1 × 64 × 589)",
78
+ "shortDescription" : "",
79
+ "shape" : "[1, 64, 589]",
80
+ "name" : "speaker_masks",
81
+ "type" : "MultiArray"
82
+ }
83
+ ],
84
+ "generatedClassName" : "SpeakerEmbedding_8_bit",
85
+ "method" : "predict"
86
+ }
87
+ ]
speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14c666c28766dd780a3afc3dbb4b44078e418e16f44d97780b9f8555407213a1
3
+ size 6661888
speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1383750df1af99af002f0c3403bcf19a18c3d749706eb3498d34b0fe01abf2fc
3
+ size 243
speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc230803421bf4fe14f843ea5b5fa0035487fd19cdd69ed670d72560b6a44586
3
+ size 330
speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/metadata.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float32",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 2998 × 80)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 2998, 80]",
13
+ "name" : "preprocessor_output_1",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "modelParameters" : [
18
+
19
+ ],
20
+ "specificationVersion" : 7,
21
+ "mlProgramOperationTypeHistogram" : {
22
+ "Ios16.cast" : 2,
23
+ "Ios16.mul" : 4,
24
+ "SliceByIndex" : 2,
25
+ "Transpose" : 2,
26
+ "SlidingWindows" : 1,
27
+ "Ios16.sub" : 3,
28
+ "Ios16.log" : 1,
29
+ "Ios16.reduceMean" : 2,
30
+ "Ios16.square" : 2,
31
+ "Squeeze" : 2,
32
+ "Ios16.matmul" : 2,
33
+ "Ios16.add" : 1,
34
+ "Ios16.linear" : 1,
35
+ "ExpandDims" : 4,
36
+ "Ios16.gather" : 2,
37
+ "Ios16.maximum" : 1,
38
+ "Identity" : 1,
39
+ "Pad" : 2
40
+ },
41
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
42
+ "isUpdatable" : "0",
43
+ "stateSchema" : [
44
+
45
+ ],
46
+ "availability" : {
47
+ "macOS" : "13.0",
48
+ "tvOS" : "16.0",
49
+ "visionOS" : "1.0",
50
+ "watchOS" : "9.0",
51
+ "iOS" : "16.0",
52
+ "macCatalyst" : "16.0"
53
+ },
54
+ "modelType" : {
55
+ "name" : "MLModelType_mlProgram"
56
+ },
57
+ "userDefinedMetadata" : {
58
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
59
+ "com.github.apple.coremltools.source" : "torch==2.6.0",
60
+ "com.github.apple.coremltools.version" : "8.2"
61
+ },
62
+ "inputSchema" : [
63
+ {
64
+ "hasShapeFlexibility" : "0",
65
+ "isOptional" : "0",
66
+ "dataType" : "Float16",
67
+ "formattedType" : "MultiArray (Float16 1 × 480000)",
68
+ "shortDescription" : "",
69
+ "shape" : "[1, 480000]",
70
+ "name" : "waveforms",
71
+ "type" : "MultiArray"
72
+ }
73
+ ],
74
+ "generatedClassName" : "SpeakerEmbeddingPreprocessor",
75
+ "method" : "predict"
76
+ }
77
+ ]
speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/model.mil ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.0)
2
+ [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}, {"coremltools-component-torch", "2.6.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.2"}})]
3
+ {
4
+ func main<ios16>(tensor<fp16, [1, 480000]> waveforms) {
5
+ tensor<string, []> cast_0_dtype_0 = const()[name = tensor<string, []>("cast_0_dtype_0"), val = tensor<string, []>("fp32")];
6
+ tensor<fp32, []> var_2_promoted = const()[name = tensor<string, []>("op_2_promoted"), val = tensor<fp32, []>(0x1p+15)];
7
+ tensor<fp32, [1, 480000]> cast_0 = cast(dtype = cast_0_dtype_0, x = waveforms)[name = tensor<string, []>("cast_11")];
8
+ tensor<fp32, [1, 480000]> waveform_1 = mul(x = cast_0, y = var_2_promoted)[name = tensor<string, []>("waveform_1")];
9
+ tensor<int32, [2]> var_6_begin_0 = const()[name = tensor<string, []>("op_6_begin_0"), val = tensor<int32, [2]>([0, 0])];
10
+ tensor<int32, [2]> var_6_end_0 = const()[name = tensor<string, []>("op_6_end_0"), val = tensor<int32, [2]>([1, 480000])];
11
+ tensor<bool, [2]> var_6_end_mask_0 = const()[name = tensor<string, []>("op_6_end_mask_0"), val = tensor<bool, [2]>([false, true])];
12
+ tensor<bool, [2]> var_6_squeeze_mask_0 = const()[name = tensor<string, []>("op_6_squeeze_mask_0"), val = tensor<bool, [2]>([true, false])];
13
+ tensor<fp32, [480000]> var_6 = slice_by_index(begin = var_6_begin_0, end = var_6_end_0, end_mask = var_6_end_mask_0, squeeze_mask = var_6_squeeze_mask_0, x = waveform_1)[name = tensor<string, []>("op_6")];
14
+ tensor<int32, []> sliding_windows_0_axis_0 = const()[name = tensor<string, []>("sliding_windows_0_axis_0"), val = tensor<int32, []>(0)];
15
+ tensor<int32, []> sliding_windows_0_size_0 = const()[name = tensor<string, []>("sliding_windows_0_size_0"), val = tensor<int32, []>(400)];
16
+ tensor<int32, []> sliding_windows_0_stride_0 = const()[name = tensor<string, []>("sliding_windows_0_stride_0"), val = tensor<int32, []>(160)];
17
+ tensor<fp32, [2998, 400]> sliding_windows_0 = sliding_windows(axis = sliding_windows_0_axis_0, size = sliding_windows_0_size_0, stride = sliding_windows_0_stride_0, x = var_6)[name = tensor<string, []>("sliding_windows_0")];
18
+ tensor<int32, [1]> var_42_axes_0 = const()[name = tensor<string, []>("op_42_axes_0"), val = tensor<int32, [1]>([1])];
19
+ tensor<bool, []> var_42_keep_dims_0 = const()[name = tensor<string, []>("op_42_keep_dims_0"), val = tensor<bool, []>(false)];
20
+ tensor<fp32, [2998]> var_42 = reduce_mean(axes = var_42_axes_0, keep_dims = var_42_keep_dims_0, x = sliding_windows_0)[name = tensor<string, []>("op_42")];
21
+ tensor<int32, [1]> row_means_axes_0 = const()[name = tensor<string, []>("row_means_axes_0"), val = tensor<int32, [1]>([1])];
22
+ tensor<fp32, [2998, 1]> row_means = expand_dims(axes = row_means_axes_0, x = var_42)[name = tensor<string, []>("row_means")];
23
+ tensor<fp32, [2998, 400]> strided_input_3 = sub(x = sliding_windows_0, y = row_means)[name = tensor<string, []>("strided_input_3")];
24
+ tensor<int32, [1]> input_1_axes_0 = const()[name = tensor<string, []>("input_1_axes_0"), val = tensor<int32, [1]>([0])];
25
+ tensor<fp32, [1, 2998, 400]> input_1 = expand_dims(axes = input_1_axes_0, x = strided_input_3)[name = tensor<string, []>("input_1")];
26
+ tensor<fp32, []> const_2 = const()[name = tensor<string, []>("const_2"), val = tensor<fp32, []>(0x0p+0)];
27
+ tensor<int32, [6]> var_54_pad_0 = const()[name = tensor<string, []>("op_54_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 1, 0])];
28
+ tensor<string, []> var_54_mode_0 = const()[name = tensor<string, []>("op_54_mode_0"), val = tensor<string, []>("replicate")];
29
+ tensor<fp32, [1, 2998, 401]> var_54 = pad(constant_val = const_2, mode = var_54_mode_0, pad = var_54_pad_0, x = input_1)[name = tensor<string, []>("op_54")];
30
+ tensor<int32, [1]> offset_strided_input_axes_0 = const()[name = tensor<string, []>("offset_strided_input_axes_0"), val = tensor<int32, [1]>([0])];
31
+ tensor<fp32, [2998, 401]> offset_strided_input = squeeze(axes = offset_strided_input_axes_0, x = var_54)[name = tensor<string, []>("offset_strided_input")];
32
+ tensor<int32, [2]> var_66_begin_0 = const()[name = tensor<string, []>("op_66_begin_0"), val = tensor<int32, [2]>([0, 0])];
33
+ tensor<int32, [2]> var_66_end_0 = const()[name = tensor<string, []>("op_66_end_0"), val = tensor<int32, [2]>([2998, 400])];
34
+ tensor<bool, [2]> var_66_end_mask_0 = const()[name = tensor<string, []>("op_66_end_mask_0"), val = tensor<bool, [2]>([true, false])];
35
+ tensor<fp32, [2998, 400]> var_66 = slice_by_index(begin = var_66_begin_0, end = var_66_end_0, end_mask = var_66_end_mask_0, x = offset_strided_input)[name = tensor<string, []>("op_66")];
36
+ tensor<fp32, []> var_67 = const()[name = tensor<string, []>("op_67"), val = tensor<fp32, []>(0x1.f0a3d8p-1)];
37
+ tensor<fp32, [2998, 400]> var_68 = mul(x = var_66, y = var_67)[name = tensor<string, []>("op_68")];
38
+ tensor<fp32, [2998, 400]> strided_input_5 = sub(x = strided_input_3, y = var_68)[name = tensor<string, []>("strided_input_5")];
39
+ tensor<fp32, [1, 400]> window_function = const()[name = tensor<string, []>("window_function"), val = tensor<fp32, [1, 400]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
40
+ tensor<fp32, [2998, 400]> strided_input_7 = mul(x = strided_input_5, y = window_function)[name = tensor<string, []>("strided_input_7")];
41
+ tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([0])];
42
+ tensor<fp32, [1, 2998, 400]> input_3 = expand_dims(axes = input_3_axes_0, x = strided_input_7)[name = tensor<string, []>("input_3")];
43
+ tensor<fp32, []> const_3 = const()[name = tensor<string, []>("const_3"), val = tensor<fp32, []>(0x0p+0)];
44
+ tensor<int32, [6]> var_90_pad_0 = const()[name = tensor<string, []>("op_90_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 0, 112])];
45
+ tensor<string, []> var_90_mode_0 = const()[name = tensor<string, []>("op_90_mode_0"), val = tensor<string, []>("constant")];
46
+ tensor<fp32, [1, 2998, 512]> var_90 = pad(constant_val = const_3, mode = var_90_mode_0, pad = var_90_pad_0, x = input_3)[name = tensor<string, []>("op_90")];
47
+ tensor<int32, [1]> strided_input_axes_0 = const()[name = tensor<string, []>("strided_input_axes_0"), val = tensor<int32, [1]>([0])];
48
+ tensor<fp32, [2998, 512]> strided_input = squeeze(axes = strided_input_axes_0, x = var_90)[name = tensor<string, []>("strided_input")];
49
+ tensor<fp32, [512, 512]> cos_0 = const()[name = tensor<string, []>("cos_0"), val = tensor<fp32, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1728)))];
50
+ tensor<fp32, [512, 512]> sin_0 = const()[name = tensor<string, []>("sin_0"), val = tensor<fp32, [512, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050368)))];
51
+ tensor<bool, []> matmul_1_transpose_x_1 = const()[name = tensor<string, []>("matmul_1_transpose_x_1"), val = tensor<bool, []>(false)];
52
+ tensor<bool, []> matmul_1_transpose_y_1 = const()[name = tensor<string, []>("matmul_1_transpose_y_1"), val = tensor<bool, []>(true)];
53
+ tensor<fp32, [512, 2998]> matmul_1 = matmul(transpose_x = matmul_1_transpose_x_1, transpose_y = matmul_1_transpose_y_1, x = cos_0, y = strided_input)[name = tensor<string, []>("matmul_1")];
54
+ tensor<bool, []> matmul_3_transpose_x_1 = const()[name = tensor<string, []>("matmul_3_transpose_x_1"), val = tensor<bool, []>(false)];
55
+ tensor<bool, []> matmul_3_transpose_y_1 = const()[name = tensor<string, []>("matmul_3_transpose_y_1"), val = tensor<bool, []>(true)];
56
+ tensor<fp32, [512, 2998]> matmul_3 = matmul(transpose_x = matmul_3_transpose_x_1, transpose_y = matmul_3_transpose_y_1, x = sin_0, y = strided_input)[name = tensor<string, []>("matmul_3")];
57
+ tensor<fp32, []> mul_1_y_0 = const()[name = tensor<string, []>("mul_1_y_0"), val = tensor<fp32, []>(-0x1p+0)];
58
+ tensor<fp32, [512, 2998]> mul_1 = mul(x = matmul_3, y = mul_1_y_0)[name = tensor<string, []>("mul_1")];
59
+ tensor<int32, [2]> transpose_3_perm_0 = const()[name = tensor<string, []>("transpose_3_perm_0"), val = tensor<int32, [2]>([-1, 0])];
60
+ tensor<int32, [2]> transpose_4_perm_0 = const()[name = tensor<string, []>("transpose_4_perm_0"), val = tensor<int32, [2]>([-1, 0])];
61
+ tensor<int32, [257]> range_1d_2 = const()[name = tensor<string, []>("range_1d_2"), val = tensor<int32, [257]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256])];
62
+ tensor<int32, []> gather_0_axis_0 = const()[name = tensor<string, []>("gather_0_axis_0"), val = tensor<int32, []>(-1)];
63
+ tensor<int32, []> gather_0_batch_dims_0 = const()[name = tensor<string, []>("gather_0_batch_dims_0"), val = tensor<int32, []>(0)];
64
+ tensor<fp32, [2998, 512]> transpose_3 = transpose(perm = transpose_3_perm_0, x = matmul_1)[name = tensor<string, []>("transpose_6")];
65
+ tensor<fp32, [2998, 257]> gather_0 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = range_1d_2, x = transpose_3)[name = tensor<string, []>("gather_0")];
66
+ tensor<int32, []> gather_1_axis_0 = const()[name = tensor<string, []>("gather_1_axis_0"), val = tensor<int32, []>(-1)];
67
+ tensor<int32, []> gather_1_batch_dims_0 = const()[name = tensor<string, []>("gather_1_batch_dims_0"), val = tensor<int32, []>(0)];
68
+ tensor<fp32, [2998, 512]> transpose_4 = transpose(perm = transpose_4_perm_0, x = mul_1)[name = tensor<string, []>("transpose_5")];
69
+ tensor<fp32, [2998, 257]> gather_1 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = range_1d_2, x = transpose_4)[name = tensor<string, []>("gather_1")];
70
+ tensor<fp32, [2998, 257]> square_0 = square(x = gather_0)[name = tensor<string, []>("square_0")];
71
+ tensor<fp32, [2998, 257]> square_1 = square(x = gather_1)[name = tensor<string, []>("square_1")];
72
+ tensor<fp32, [2998, 257]> add_1 = add(x = square_0, y = square_1)[name = tensor<string, []>("add_1")];
73
+ tensor<fp32, [2998, 257]> spectrum = identity(x = add_1)[name = tensor<string, []>("spectrum")];
74
+ tensor<fp32, [80, 257]> mel_energies_3 = const()[name = tensor<string, []>("mel_energies_3"), val = tensor<fp32, [80, 257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2099008)))];
75
+ tensor<fp32, [80]> mel_energies_bias_0 = const()[name = tensor<string, []>("mel_energies_bias_0"), val = tensor<fp32, [80]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2181312)))];
76
+ tensor<fp32, [2998, 80]> mel_energies = linear(bias = mel_energies_bias_0, weight = mel_energies_3, x = spectrum)[name = tensor<string, []>("mel_energies")];
77
+ tensor<fp32, []> const_10 = const()[name = tensor<string, []>("const_10"), val = tensor<fp32, []>(0x1p-23)];
78
+ tensor<fp32, [2998, 80]> var_186 = maximum(x = mel_energies, y = const_10)[name = tensor<string, []>("op_186")];
79
+ tensor<fp32, []> filter_banks_epsilon_0 = const()[name = tensor<string, []>("filter_banks_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
80
+ tensor<fp32, [2998, 80]> filter_banks = log(epsilon = filter_banks_epsilon_0, x = var_186)[name = tensor<string, []>("filter_banks")];
81
+ tensor<int32, [1]> var_192_axes_0 = const()[name = tensor<string, []>("op_192_axes_0"), val = tensor<int32, [1]>([0])];
82
+ tensor<bool, []> var_192_keep_dims_0 = const()[name = tensor<string, []>("op_192_keep_dims_0"), val = tensor<bool, []>(true)];
83
+ tensor<fp32, [1, 80]> var_192 = reduce_mean(axes = var_192_axes_0, keep_dims = var_192_keep_dims_0, x = filter_banks)[name = tensor<string, []>("op_192")];
84
+ tensor<fp32, [2998, 80]> var_194 = sub(x = filter_banks, y = var_192)[name = tensor<string, []>("op_194")];
85
+ tensor<int32, [1]> obj_axes_0 = const()[name = tensor<string, []>("obj_axes_0"), val = tensor<int32, [1]>([0])];
86
+ tensor<fp32, [1, 2998, 80]> preprocessor_output_1_type_fp32 = expand_dims(axes = obj_axes_0, x = var_194)[name = tensor<string, []>("obj")];
87
+ tensor<string, []> cast_9_dtype_0 = const()[name = tensor<string, []>("cast_9_dtype_0"), val = tensor<string, []>("fp16")];
88
+ tensor<fp16, [1, 2998, 80]> preprocessor_output_1 = cast(dtype = cast_9_dtype_0, x = preprocessor_output_1_type_fp32)[name = tensor<string, []>("cast_10")];
89
+ } -> (preprocessor_output_1);
90
+ }
speaker_embedder/pyannote-v3-pro/W8A16/SpeakerEmbedderPreprocessor.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f2c284bd22f1f7ab76901c1c6e57f82d4ebbf057fa0b924aad057f124f77a89
3
+ size 2181696
speaker_segmenter/pyannote-v3-pro/LICENSE_NOTICE.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Argmax proprietary and confidential. Under NDA.
2
+
3
+ Copyright 2025 Argmax, Inc. All rights reserved.
4
+
5
+ Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
6
+
7
+ Please contact Argmax for licensing information at [email protected].
speaker_segmenter/pyannote-v3-pro/README.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # License
2
+
3
+ Original model weights: https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE
4
+ Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
5
+
6
+ Please contact [email protected] for licensing SpeakerKit Pro assets
speaker_segmenter/pyannote-v3-pro/W32A32/LICENSE_NOTICE.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Argmax proprietary and confidential. Under NDA.
2
+
3
+ Copyright 2025 Argmax, Inc. All rights reserved.
4
+
5
+ Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
6
+
7
+ Please contact Argmax for licensing information at [email protected].
speaker_segmenter/pyannote-v3-pro/W32A32/README.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # License
2
+
3
+ Original model weights: https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE
4
+ Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
5
+
6
+ Please contact [email protected] for licensing SpeakerKit Pro assets
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f3593627d0e4a99c4537655e8339aa5f786a175f913194781af7c6ef3b969f7
3
+ size 243
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d92ffe80d66daa38293970d33fef64924b89b3f8d8ee71606e1a1e03e19d200
3
+ size 519
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/metadata.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float32",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 21 × 589 × 3)",
11
+ "shortDescription" : "",
12
+ "shape" : "[21, 589, 3]",
13
+ "name" : "speaker_probs",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 21 × 589 × 3)",
21
+ "shortDescription" : "",
22
+ "shape" : "[21, 589, 3]",
23
+ "name" : "speaker_ids",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 21 × 3)",
31
+ "shortDescription" : "",
32
+ "shape" : "[21, 3]",
33
+ "name" : "speaker_activity",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 21 × 589)",
41
+ "shortDescription" : "",
42
+ "shape" : "[21, 589]",
43
+ "name" : "overlapped_speaker_activity",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1767)",
51
+ "shortDescription" : "",
52
+ "shape" : "[1767]",
53
+ "name" : "voice_activity",
54
+ "type" : "MultiArray"
55
+ },
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 21 × 1 × 160000)",
61
+ "shortDescription" : "",
62
+ "shape" : "[21, 1, 160000]",
63
+ "name" : "sliding_window_waveform",
64
+ "type" : "MultiArray"
65
+ }
66
+ ],
67
+ "modelParameters" : [
68
+
69
+ ],
70
+ "specificationVersion" : 7,
71
+ "mlProgramOperationTypeHistogram" : {
72
+ "Transpose" : 2,
73
+ "Ios16.maxPool" : 3,
74
+ "Ios16.exp" : 1,
75
+ "Ios16.softmax" : 1,
76
+ "SlidingWindows" : 1,
77
+ "Ios16.linear" : 5,
78
+ "Ios16.add" : 40,
79
+ "Ios16.realDiv" : 1,
80
+ "Ios16.reduceMax" : 1,
81
+ "Ios16.reduceSum" : 2,
82
+ "Ios16.reduceArgmax" : 1,
83
+ "Ios16.greater" : 1,
84
+ "Ios16.log" : 1,
85
+ "ExpandDims" : 1,
86
+ "Ios16.instanceNorm" : 4,
87
+ "Ios16.sub" : 1,
88
+ "Ios16.cast" : 9,
89
+ "Ios16.conv" : 3,
90
+ "Ios16.lstm" : 4,
91
+ "OneHot" : 1,
92
+ "SliceByIndex" : 61,
93
+ "Ios16.abs" : 1,
94
+ "Ios16.scatter" : 42,
95
+ "Ios16.mul" : 1,
96
+ "Ios16.leakyRelu" : 5
97
+ },
98
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
99
+ "isUpdatable" : "0",
100
+ "stateSchema" : [
101
+
102
+ ],
103
+ "availability" : {
104
+ "macOS" : "13.0",
105
+ "tvOS" : "16.0",
106
+ "visionOS" : "1.0",
107
+ "watchOS" : "9.0",
108
+ "iOS" : "16.0",
109
+ "macCatalyst" : "16.0"
110
+ },
111
+ "modelType" : {
112
+ "name" : "MLModelType_mlProgram"
113
+ },
114
+ "userDefinedMetadata" : {
115
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
116
+ "com.github.apple.coremltools.version" : "8.2",
117
+ "com.github.apple.coremltools.source" : "torch==2.6.0"
118
+ },
119
+ "inputSchema" : [
120
+ {
121
+ "hasShapeFlexibility" : "0",
122
+ "isOptional" : "0",
123
+ "dataType" : "Float16",
124
+ "formattedType" : "MultiArray (Float16 480000)",
125
+ "shortDescription" : "",
126
+ "shape" : "[480000]",
127
+ "name" : "waveform",
128
+ "type" : "MultiArray"
129
+ },
130
+ {
131
+ "hasShapeFlexibility" : "0",
132
+ "isOptional" : "0",
133
+ "dataType" : "Float16",
134
+ "formattedType" : "MultiArray (Float16 7)",
135
+ "shortDescription" : "",
136
+ "shape" : "[7]",
137
+ "name" : "input_1",
138
+ "type" : "MultiArray"
139
+ }
140
+ ],
141
+ "generatedClassName" : "SpeakerSegmenter",
142
+ "method" : "predict"
143
+ }
144
+ ]
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76cb6e08ccbc6a007135fca00179f5c72ed6b0878e1584b129614388ee6909b
3
+ size 5990900
speaker_segmenter/pyannote-v3-pro/W8A16/LICENSE_NOTICE.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Argmax proprietary and confidential. Under NDA.
2
+
3
+ Copyright 2025 Argmax, Inc. All rights reserved.
4
+
5
+ Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
6
+
7
+ Please contact Argmax for licensing information at [email protected].
speaker_segmenter/pyannote-v3-pro/W8A16/README.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # License
2
+
3
+ Original model weights: https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE
4
+ Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
5
+
6
+ Please contact [email protected] for licensing SpeakerKit Pro assets
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25923d134e4f141b7e5284d06144f7b5961eb43b3a7027495bb54e322706570b
3
+ size 243
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d92ffe80d66daa38293970d33fef64924b89b3f8d8ee71606e1a1e03e19d200
3
+ size 519
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/metadata.json ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Float32, Palettized (8 bits))",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 21 × 589 × 3)",
11
+ "shortDescription" : "",
12
+ "shape" : "[21, 589, 3]",
13
+ "name" : "speaker_probs",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 21 × 589 × 3)",
21
+ "shortDescription" : "",
22
+ "shape" : "[21, 589, 3]",
23
+ "name" : "speaker_ids",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 21 × 3)",
31
+ "shortDescription" : "",
32
+ "shape" : "[21, 3]",
33
+ "name" : "speaker_activity",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 21 × 589)",
41
+ "shortDescription" : "",
42
+ "shape" : "[21, 589]",
43
+ "name" : "overlapped_speaker_activity",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1767)",
51
+ "shortDescription" : "",
52
+ "shape" : "[1767]",
53
+ "name" : "voice_activity",
54
+ "type" : "MultiArray"
55
+ },
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 21 × 1 × 160000)",
61
+ "shortDescription" : "",
62
+ "shape" : "[21, 1, 160000]",
63
+ "name" : "sliding_window_waveform",
64
+ "type" : "MultiArray"
65
+ }
66
+ ],
67
+ "modelParameters" : [
68
+
69
+ ],
70
+ "specificationVersion" : 7,
71
+ "mlProgramOperationTypeHistogram" : {
72
+ "Transpose" : 2,
73
+ "Ios16.maxPool" : 3,
74
+ "Ios16.exp" : 1,
75
+ "Ios16.softmax" : 1,
76
+ "SlidingWindows" : 1,
77
+ "Ios16.linear" : 5,
78
+ "Ios16.add" : 40,
79
+ "Ios16.realDiv" : 1,
80
+ "Ios16.reduceMax" : 1,
81
+ "Ios16.reduceSum" : 2,
82
+ "Ios16.reduceArgmax" : 1,
83
+ "Ios16.greater" : 1,
84
+ "Ios16.log" : 1,
85
+ "ExpandDims" : 1,
86
+ "Ios16.instanceNorm" : 4,
87
+ "Ios16.sub" : 1,
88
+ "Ios16.cast" : 4,
89
+ "Ios16.conv" : 3,
90
+ "Ios16.constexprLutToDense" : 22,
91
+ "OneHot" : 1,
92
+ "Ios16.abs" : 1,
93
+ "Ios16.lstm" : 4,
94
+ "SliceByIndex" : 61,
95
+ "Ios16.scatter" : 42,
96
+ "Ios16.mul" : 1,
97
+ "Ios16.leakyRelu" : 5
98
+ },
99
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
100
+ "isUpdatable" : "0",
101
+ "stateSchema" : [
102
+
103
+ ],
104
+ "availability" : {
105
+ "macOS" : "13.0",
106
+ "tvOS" : "16.0",
107
+ "visionOS" : "1.0",
108
+ "watchOS" : "9.0",
109
+ "iOS" : "16.0",
110
+ "macCatalyst" : "16.0"
111
+ },
112
+ "modelType" : {
113
+ "name" : "MLModelType_mlProgram"
114
+ },
115
+ "userDefinedMetadata" : {
116
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
117
+ "com.github.apple.coremltools.version" : "8.2",
118
+ "com.github.apple.coremltools.source" : "torch==2.6.0"
119
+ },
120
+ "inputSchema" : [
121
+ {
122
+ "hasShapeFlexibility" : "0",
123
+ "isOptional" : "0",
124
+ "dataType" : "Float16",
125
+ "formattedType" : "MultiArray (Float16 480000)",
126
+ "shortDescription" : "",
127
+ "shape" : "[480000]",
128
+ "name" : "waveform",
129
+ "type" : "MultiArray"
130
+ },
131
+ {
132
+ "hasShapeFlexibility" : "0",
133
+ "isOptional" : "0",
134
+ "dataType" : "Float16",
135
+ "formattedType" : "MultiArray (Float16 7)",
136
+ "shortDescription" : "",
137
+ "shape" : "[7]",
138
+ "name" : "input_1",
139
+ "type" : "MultiArray"
140
+ }
141
+ ],
142
+ "generatedClassName" : "SpeakerSegmenter_8_bit",
143
+ "method" : "predict"
144
+ }
145
+ ]
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18aa76153cdb04fc74b8b3ab438d96d0f297aba084f4ebc7ea2b7dcc611c1838
3
+ size 1537882