nvidia
/

stt_hy_fastconformer_hybrid_large_pc

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.nemo filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,150 @@
----
 license: cc-by-4.0
----

 license: cc-by-4.0
+datasets:
+  - mozilla-foundation/common_voice_17_0
+  - google/fleurs
+language:
+  - hy
+pipeline_tag: automatic-speech-recognition
+library_name: NeMo
+metrics:
+  - WER
+  - CER
+tags:
+  - speech-recognition
+  - ASR
+  - Armenian
+  - Conformer
+  - Transducer
+  - CTC
+  - NeMo
+  - hf-asr-leaderboard
+  - speech
+  - audio
+model-index:
+  - name: stt_hy_fastconformer_hybrid_large_pc
+    results:
+      - task:
+          name: Automatic Speech Recognition
+          type: automatic-speech-recognition
+        dataset:
+          name: MCV17
+          type: mozilla-foundation/common_voice_17_0
+          split: test
+          args:
+            language: hy
+        metrics:
+          - name: Test WER
+            type: wer
+            value: 9.90
+      - task:
+          name: Automatic Speech Recognition
+          type: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          split: test
+          args:
+            language: hy
+        metrics:
+          - name: Test WER
+            type: wer
+            value: 12.32
+model-details:
+  name: NVIDIA FastConformer-Hybrid Large (hy)
+  description: |
+    This model transcribes speech in the Armenian language with capitalization and punctuation marks support. It is a "large" version of the FastConformer Transducer-CTC model with 115M parameters, trained on Transducer (default) and CTC losses.
+  license: cc-by-4.0
+  architecture: FastConformer-Hybrid
+  tokenizer:
+    type: SentencePiece
+    vocab_size: 1024
+inputs:
+  type: audio
+  format: wav
+  properties:
+    - 16000 Hz Mono-channel Audio
+    - Pre-Processing Not Needed
+outputs:
+  type: text
+  format: string
+  properties:
+    - Armenian text with punctuation and capitalization
+    - May need inverse text normalization
+    - Does not handle special characters
+limitations:
+  - Non-streaming model
+  - Accuracy depends on input audio characteristics
+  - Not recommended for word-for-word transcription
+  - Limited domain-specific vocabulary
+usage:
+  framework: NeMo
+  pre-trained-model: nvidia/stt_hy_fastconformer_hybrid_large_pc
+  code:
+    - import nemo.collections.asr as nemo_asr
+    - asr_model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained(model_name="nvidia/stt_hy_fastconformer_hybrid_large_pc")
+    - asr_model.transcribe(['your_audio_file.wav'])
+training:
+  epochs: 200
+  dataset:
+    total_hours: 296.19
+    sources:
+      - Mozilla Common Voice 17.0 (48h)
+      - Google Fleurs (12h)
+      - ArmenianGrqaserAudioBooks (21.96h)
+      - Proprietary Corpus 1 (69.23h)
+      - Proprietary Corpus 2 (145h)
+evaluation:
+  datasets:
+    - Mozilla Common Voice 17.0
+    - Google Fleurs
+    - Proprietary Corpus 1
+  metrics:
+    WER:
+      - MCV Test WER: 9.90
+      - FLEURS Test WER: 12.32
+    CER: Not provided
+deployment:
+  hardware:
+    - NVIDIA Ampere
+    - NVIDIA Blackwell
+    - NVIDIA Jetson
+    - NVIDIA Hopper
+    - NVIDIA Lovelace
+    - NVIDIA Pascal
+    - NVIDIA Turing
+    - NVIDIA Volta
+  runtime: NeMo 2.0.0
+  os: Linux
+ethical-considerations:
+  trustworthy-ai:
+    considerations: Ensure model meets requirements for relevant industries and addresses misuse.
+  explainability:
+    application: Automatic Speech Recognition
+    performance:
+      - WER
+      - CER
+      - Real-Time Factor
+    risks:
+      - Accuracy may vary with input characteristics.
+  privacy:
+    compliance: Reviewed for privacy laws
+    personal-data: No identifiable personal data
+  safety:
+    use-cases: Not applicable for life-critical applications.
+    noise-sensitivity: Sensitive to noise and input variations.

stt_hy_fastconformer_hybrid_large_pc.nemo ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b75fb9203d48c1a50db3ab6890df0f3d85086a14d9547940adac69c1deaa20eb
+size 459243520