New pyannote-v3-pro SpeakerSegmenter

Browse files

Files changed (16) hide show

speaker_segmenter/pyannote-v3-pro/LICENSE_NOTICE.txt +7 -0
speaker_segmenter/pyannote-v3-pro/README.txt +6 -0
speaker_segmenter/pyannote-v3-pro/W32A32/LICENSE_NOTICE.txt +7 -0
speaker_segmenter/pyannote-v3-pro/W32A32/README.txt +6 -0
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/analytics/coremldata.bin +3 -0
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/coremldata.bin +3 -0
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/metadata.json +144 -0
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/model.mil +0 -0
speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/weights/weight.bin +3 -0
speaker_segmenter/pyannote-v3-pro/W8A16/LICENSE_NOTICE.txt +7 -0
speaker_segmenter/pyannote-v3-pro/W8A16/README.txt +6 -0
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/analytics/coremldata.bin +3 -0
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/coremldata.bin +3 -0
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/metadata.json +145 -0
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/model.mil +0 -0
speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/weights/weight.bin +3 -0

speaker_segmenter/pyannote-v3-pro/LICENSE_NOTICE.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+Argmax proprietary and confidential. Under NDA.
+Copyright 2024 Argmax, Inc. All rights reserved.
+Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
+Please contact Argmax for licensing information at [email protected].

speaker_segmenter/pyannote-v3-pro/README.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+# License
+Original model weights: https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE
+Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
+Please contact [email protected] for licensing SpeakerKit Pro assets

speaker_segmenter/pyannote-v3-pro/W32A32/LICENSE_NOTICE.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+Argmax proprietary and confidential. Under NDA.
+Copyright 2024 Argmax, Inc. All rights reserved.
+Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
+Please contact Argmax for licensing information at [email protected].

speaker_segmenter/pyannote-v3-pro/W32A32/README.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+# License
+Original model weights: https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE
+Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
+Please contact [email protected] for licensing SpeakerKit Pro assets

speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f3593627d0e4a99c4537655e8339aa5f786a175f913194781af7c6ef3b969f7
+size 243

speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d92ffe80d66daa38293970d33fef64924b89b3f8d8ee71606e1a1e03e19d200
+size 519

speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,144 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float32",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 21 × 589 × 3)",
+        "shortDescription" : "",
+        "shape" : "[21, 589, 3]",
+        "name" : "speaker_probs",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 21 × 589 × 3)",
+        "shortDescription" : "",
+        "shape" : "[21, 589, 3]",
+        "name" : "speaker_ids",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 21 × 3)",
+        "shortDescription" : "",
+        "shape" : "[21, 3]",
+        "name" : "speaker_activity",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 21 × 589)",
+        "shortDescription" : "",
+        "shape" : "[21, 589]",
+        "name" : "overlapped_speaker_activity",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1767)",
+        "shortDescription" : "",
+        "shape" : "[1767]",
+        "name" : "voice_activity",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 21 × 1 × 160000)",
+        "shortDescription" : "",
+        "shape" : "[21, 1, 160000]",
+        "name" : "sliding_window_waveform",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Transpose" : 2,
+      "Ios16.maxPool" : 3,
+      "Ios16.exp" : 1,
+      "Ios16.softmax" : 1,
+      "SlidingWindows" : 1,
+      "Ios16.linear" : 5,
+      "Ios16.add" : 40,
+      "Ios16.realDiv" : 1,
+      "Ios16.reduceMax" : 1,
+      "Ios16.reduceSum" : 2,
+      "Ios16.reduceArgmax" : 1,
+      "Ios16.greater" : 1,
+      "Ios16.log" : 1,
+      "ExpandDims" : 1,
+      "Ios16.instanceNorm" : 4,
+      "Ios16.sub" : 1,
+      "Ios16.cast" : 9,
+      "Ios16.conv" : 3,
+      "Ios16.lstm" : 4,
+      "OneHot" : 1,
+      "SliceByIndex" : 61,
+      "Ios16.abs" : 1,
+      "Ios16.scatter" : 42,
+      "Ios16.mul" : 1,
+      "Ios16.leakyRelu" : 5
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.version" : "8.2",
+      "com.github.apple.coremltools.source" : "torch==2.6.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 480000)",
+        "shortDescription" : "",
+        "shape" : "[480000]",
+        "name" : "waveform",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 7)",
+        "shortDescription" : "",
+        "shape" : "[7]",
+        "name" : "input_1",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "SpeakerSegmenter",
+    "method" : "predict"
+  }
+]

speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

speaker_segmenter/pyannote-v3-pro/W32A32/SpeakerSegmenter.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e76cb6e08ccbc6a007135fca00179f5c72ed6b0878e1584b129614388ee6909b
+size 5990900

speaker_segmenter/pyannote-v3-pro/W8A16/LICENSE_NOTICE.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+Argmax proprietary and confidential. Under NDA.
+Copyright 2024 Argmax, Inc. All rights reserved.
+Unauthorized access, copying, use, distribution, and or commercialization of this file, via any medium or means is strictly prohibited.
+Please contact Argmax for licensing information at [email protected].

speaker_segmenter/pyannote-v3-pro/W8A16/README.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+# License
+Original model weights: https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE
+Argmax-optimized model asset (Assets with `.mlmodelc` extension): https://huggingface.co/argmaxinc/speakerkit-pro/blob/main/LICENSE_NOTICE.txt
+Please contact [email protected] for licensing SpeakerKit Pro assets

speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25923d134e4f141b7e5284d06144f7b5961eb43b3a7027495bb54e322706570b
+size 243

speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d92ffe80d66daa38293970d33fef64924b89b3f8d8ee71606e1a1e03e19d200
+size 519

speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,145 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float16, Float32, Palettized (8 bits))",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 21 × 589 × 3)",
+        "shortDescription" : "",
+        "shape" : "[21, 589, 3]",
+        "name" : "speaker_probs",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 21 × 589 × 3)",
+        "shortDescription" : "",
+        "shape" : "[21, 589, 3]",
+        "name" : "speaker_ids",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 21 × 3)",
+        "shortDescription" : "",
+        "shape" : "[21, 3]",
+        "name" : "speaker_activity",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 21 × 589)",
+        "shortDescription" : "",
+        "shape" : "[21, 589]",
+        "name" : "overlapped_speaker_activity",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1767)",
+        "shortDescription" : "",
+        "shape" : "[1767]",
+        "name" : "voice_activity",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 21 × 1 × 160000)",
+        "shortDescription" : "",
+        "shape" : "[21, 1, 160000]",
+        "name" : "sliding_window_waveform",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 7,
+    "mlProgramOperationTypeHistogram" : {
+      "Transpose" : 2,
+      "Ios16.maxPool" : 3,
+      "Ios16.exp" : 1,
+      "Ios16.softmax" : 1,
+      "SlidingWindows" : 1,
+      "Ios16.linear" : 5,
+      "Ios16.add" : 40,
+      "Ios16.realDiv" : 1,
+      "Ios16.reduceMax" : 1,
+      "Ios16.reduceSum" : 2,
+      "Ios16.reduceArgmax" : 1,
+      "Ios16.greater" : 1,
+      "Ios16.log" : 1,
+      "ExpandDims" : 1,
+      "Ios16.instanceNorm" : 4,
+      "Ios16.sub" : 1,
+      "Ios16.cast" : 4,
+      "Ios16.conv" : 3,
+      "Ios16.constexprLutToDense" : 22,
+      "OneHot" : 1,
+      "Ios16.abs" : 1,
+      "Ios16.lstm" : 4,
+      "SliceByIndex" : 61,
+      "Ios16.scatter" : 42,
+      "Ios16.mul" : 1,
+      "Ios16.leakyRelu" : 5
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.version" : "8.2",
+      "com.github.apple.coremltools.source" : "torch==2.6.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 480000)",
+        "shortDescription" : "",
+        "shape" : "[480000]",
+        "name" : "waveform",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 7)",
+        "shortDescription" : "",
+        "shape" : "[7]",
+        "name" : "input_1",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "SpeakerSegmenter_8_bit",
+    "method" : "predict"
+  }
+]

speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

speaker_segmenter/pyannote-v3-pro/W8A16/SpeakerSegmenter.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18aa76153cdb04fc74b8b3ab438d96d0f297aba084f4ebc7ea2b7dcc611c1838
+size 1537882