arda-argmax commited on
Commit
5ea9592
·
verified ·
1 Parent(s): 8c02f5e

New pyannote-v3-pro SpeakerSegmenter

Browse files
speaker_segmenter/pyannote-v3-pro/LICENSE_NOTICE.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Argmax proprietary and confidential. Under NDA.
2
+
3
+ Copyright 2024 Argmax, Inc. All rights reserved.
4
+
5
+ Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
6
+
7
+ Please contact Argmax for licensing information at [email protected].
speaker_segmenter/pyannote-v3-pro/README.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # License
2
+
3
+ Original model weights: https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE
4
+ Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
5
+
6
+ Please contact [email protected] for licensing SpeakerKit Pro assets
speaker_segmenter/pyannote-v3-pro/W32A32/LICENSE_NOTICE.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Argmax proprietary and confidential. Under NDA.
2
+
3
+ Copyright 2024 Argmax, Inc. All rights reserved.
4
+
5
+ Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
6
+
7
+ Please contact Argmax for licensing information at [email protected].
speaker_segmenter/pyannote-v3-pro/W32A32/README.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # License
2
+
3
+ Original model weights: https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE
4
+ Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
5
+
6
+ Please contact [email protected] for licensing SpeakerKit Pro assets
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f3593627d0e4a99c4537655e8339aa5f786a175f913194781af7c6ef3b969f7
3
+ size 243
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d92ffe80d66daa38293970d33fef64924b89b3f8d8ee71606e1a1e03e19d200
3
+ size 519
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/metadata.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float32",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 21 × 589 × 3)",
11
+ "shortDescription" : "",
12
+ "shape" : "[21, 589, 3]",
13
+ "name" : "speaker_probs",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 21 × 589 × 3)",
21
+ "shortDescription" : "",
22
+ "shape" : "[21, 589, 3]",
23
+ "name" : "speaker_ids",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 21 × 3)",
31
+ "shortDescription" : "",
32
+ "shape" : "[21, 3]",
33
+ "name" : "speaker_activity",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 21 × 589)",
41
+ "shortDescription" : "",
42
+ "shape" : "[21, 589]",
43
+ "name" : "overlapped_speaker_activity",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1767)",
51
+ "shortDescription" : "",
52
+ "shape" : "[1767]",
53
+ "name" : "voice_activity",
54
+ "type" : "MultiArray"
55
+ },
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 21 × 1 × 160000)",
61
+ "shortDescription" : "",
62
+ "shape" : "[21, 1, 160000]",
63
+ "name" : "sliding_window_waveform",
64
+ "type" : "MultiArray"
65
+ }
66
+ ],
67
+ "modelParameters" : [
68
+
69
+ ],
70
+ "specificationVersion" : 7,
71
+ "mlProgramOperationTypeHistogram" : {
72
+ "Transpose" : 2,
73
+ "Ios16.maxPool" : 3,
74
+ "Ios16.exp" : 1,
75
+ "Ios16.softmax" : 1,
76
+ "SlidingWindows" : 1,
77
+ "Ios16.linear" : 5,
78
+ "Ios16.add" : 40,
79
+ "Ios16.realDiv" : 1,
80
+ "Ios16.reduceMax" : 1,
81
+ "Ios16.reduceSum" : 2,
82
+ "Ios16.reduceArgmax" : 1,
83
+ "Ios16.greater" : 1,
84
+ "Ios16.log" : 1,
85
+ "ExpandDims" : 1,
86
+ "Ios16.instanceNorm" : 4,
87
+ "Ios16.sub" : 1,
88
+ "Ios16.cast" : 9,
89
+ "Ios16.conv" : 3,
90
+ "Ios16.lstm" : 4,
91
+ "OneHot" : 1,
92
+ "SliceByIndex" : 61,
93
+ "Ios16.abs" : 1,
94
+ "Ios16.scatter" : 42,
95
+ "Ios16.mul" : 1,
96
+ "Ios16.leakyRelu" : 5
97
+ },
98
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
99
+ "isUpdatable" : "0",
100
+ "stateSchema" : [
101
+
102
+ ],
103
+ "availability" : {
104
+ "macOS" : "13.0",
105
+ "tvOS" : "16.0",
106
+ "visionOS" : "1.0",
107
+ "watchOS" : "9.0",
108
+ "iOS" : "16.0",
109
+ "macCatalyst" : "16.0"
110
+ },
111
+ "modelType" : {
112
+ "name" : "MLModelType_mlProgram"
113
+ },
114
+ "userDefinedMetadata" : {
115
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
116
+ "com.github.apple.coremltools.version" : "8.2",
117
+ "com.github.apple.coremltools.source" : "torch==2.6.0"
118
+ },
119
+ "inputSchema" : [
120
+ {
121
+ "hasShapeFlexibility" : "0",
122
+ "isOptional" : "0",
123
+ "dataType" : "Float16",
124
+ "formattedType" : "MultiArray (Float16 480000)",
125
+ "shortDescription" : "",
126
+ "shape" : "[480000]",
127
+ "name" : "waveform",
128
+ "type" : "MultiArray"
129
+ },
130
+ {
131
+ "hasShapeFlexibility" : "0",
132
+ "isOptional" : "0",
133
+ "dataType" : "Float16",
134
+ "formattedType" : "MultiArray (Float16 7)",
135
+ "shortDescription" : "",
136
+ "shape" : "[7]",
137
+ "name" : "input_1",
138
+ "type" : "MultiArray"
139
+ }
140
+ ],
141
+ "generatedClassName" : "SpeakerSegmenter",
142
+ "method" : "predict"
143
+ }
144
+ ]
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76cb6e08ccbc6a007135fca00179f5c72ed6b0878e1584b129614388ee6909b
3
+ size 5990900
speaker_segmenter/pyannote-v3-pro/W8A16/LICENSE_NOTICE.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Argmax proprietary and confidential. Under NDA.
2
+
3
+ Copyright 2024 Argmax, Inc. All rights reserved.
4
+
5
+ Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
6
+
7
+ Please contact Argmax for licensing information at [email protected].
speaker_segmenter/pyannote-v3-pro/W8A16/README.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # License
2
+
3
+ Original model weights: https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE
4
+ Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
5
+
6
+ Please contact [email protected] for licensing SpeakerKit Pro assets
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25923d134e4f141b7e5284d06144f7b5961eb43b3a7027495bb54e322706570b
3
+ size 243
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d92ffe80d66daa38293970d33fef64924b89b3f8d8ee71606e1a1e03e19d200
3
+ size 519
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/metadata.json ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Float32, Palettized (8 bits))",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 21 × 589 × 3)",
11
+ "shortDescription" : "",
12
+ "shape" : "[21, 589, 3]",
13
+ "name" : "speaker_probs",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 21 × 589 × 3)",
21
+ "shortDescription" : "",
22
+ "shape" : "[21, 589, 3]",
23
+ "name" : "speaker_ids",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 21 × 3)",
31
+ "shortDescription" : "",
32
+ "shape" : "[21, 3]",
33
+ "name" : "speaker_activity",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 21 × 589)",
41
+ "shortDescription" : "",
42
+ "shape" : "[21, 589]",
43
+ "name" : "overlapped_speaker_activity",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1767)",
51
+ "shortDescription" : "",
52
+ "shape" : "[1767]",
53
+ "name" : "voice_activity",
54
+ "type" : "MultiArray"
55
+ },
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 21 × 1 × 160000)",
61
+ "shortDescription" : "",
62
+ "shape" : "[21, 1, 160000]",
63
+ "name" : "sliding_window_waveform",
64
+ "type" : "MultiArray"
65
+ }
66
+ ],
67
+ "modelParameters" : [
68
+
69
+ ],
70
+ "specificationVersion" : 7,
71
+ "mlProgramOperationTypeHistogram" : {
72
+ "Transpose" : 2,
73
+ "Ios16.maxPool" : 3,
74
+ "Ios16.exp" : 1,
75
+ "Ios16.softmax" : 1,
76
+ "SlidingWindows" : 1,
77
+ "Ios16.linear" : 5,
78
+ "Ios16.add" : 40,
79
+ "Ios16.realDiv" : 1,
80
+ "Ios16.reduceMax" : 1,
81
+ "Ios16.reduceSum" : 2,
82
+ "Ios16.reduceArgmax" : 1,
83
+ "Ios16.greater" : 1,
84
+ "Ios16.log" : 1,
85
+ "ExpandDims" : 1,
86
+ "Ios16.instanceNorm" : 4,
87
+ "Ios16.sub" : 1,
88
+ "Ios16.cast" : 4,
89
+ "Ios16.conv" : 3,
90
+ "Ios16.constexprLutToDense" : 22,
91
+ "OneHot" : 1,
92
+ "Ios16.abs" : 1,
93
+ "Ios16.lstm" : 4,
94
+ "SliceByIndex" : 61,
95
+ "Ios16.scatter" : 42,
96
+ "Ios16.mul" : 1,
97
+ "Ios16.leakyRelu" : 5
98
+ },
99
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
100
+ "isUpdatable" : "0",
101
+ "stateSchema" : [
102
+
103
+ ],
104
+ "availability" : {
105
+ "macOS" : "13.0",
106
+ "tvOS" : "16.0",
107
+ "visionOS" : "1.0",
108
+ "watchOS" : "9.0",
109
+ "iOS" : "16.0",
110
+ "macCatalyst" : "16.0"
111
+ },
112
+ "modelType" : {
113
+ "name" : "MLModelType_mlProgram"
114
+ },
115
+ "userDefinedMetadata" : {
116
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
117
+ "com.github.apple.coremltools.version" : "8.2",
118
+ "com.github.apple.coremltools.source" : "torch==2.6.0"
119
+ },
120
+ "inputSchema" : [
121
+ {
122
+ "hasShapeFlexibility" : "0",
123
+ "isOptional" : "0",
124
+ "dataType" : "Float16",
125
+ "formattedType" : "MultiArray (Float16 480000)",
126
+ "shortDescription" : "",
127
+ "shape" : "[480000]",
128
+ "name" : "waveform",
129
+ "type" : "MultiArray"
130
+ },
131
+ {
132
+ "hasShapeFlexibility" : "0",
133
+ "isOptional" : "0",
134
+ "dataType" : "Float16",
135
+ "formattedType" : "MultiArray (Float16 7)",
136
+ "shortDescription" : "",
137
+ "shape" : "[7]",
138
+ "name" : "input_1",
139
+ "type" : "MultiArray"
140
+ }
141
+ ],
142
+ "generatedClassName" : "SpeakerSegmenter_8_bit",
143
+ "method" : "predict"
144
+ }
145
+ ]
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18aa76153cdb04fc74b8b3ab438d96d0f297aba084f4ebc7ea2b7dcc611c1838
3
+ size 1537882