stt_en_conformer_ctc_small_v2 / model_config.yaml
phtran's picture
Push model using huggingface_hub.
23438b4 verified
raw
history blame
16.6 kB
sample_rate: 16000
log_prediction: true
ctc_reduction: mean_batch
train_ds:
manifest_filepath: /data/NeMo_ASR_SET/English/v2.0/train/tarred_audio_manifest.json
sample_rate: 16000
batch_size: 64
shuffle: true
num_workers: 8
pin_memory: true
use_start_end_token: false
trim_silence: false
max_duration: 20.0
min_duration: 0.1
shuffle_n: 2048
is_tarred: true
tarred_audio_filepaths: /data/NeMo_ASR_SET/English/v2.0/train/audio__OP_0..4095_CL_.tar
validation_ds:
manifest_filepath:
- /data/ASR/LibriSpeech/librispeech_withsp2/manifests/librivox-dev-other.json
- /data/ASR/LibriSpeech/librispeech_withsp2/manifests/librivox-dev-clean.json
- /data/ASR/LibriSpeech/librispeech_withsp2/manifests/librivox-test-other.json
- /data/ASR/LibriSpeech/librispeech_withsp2/manifests/librivox-test-clean.json
sample_rate: 16000
batch_size: 64
shuffle: false
num_workers: 8
pin_memory: true
use_start_end_token: false
is_tarred: false
tarred_audio_filepaths: na
test_ds:
manifest_filepath:
- /data/ASR/LibriSpeech/librispeech_withsp2/manifests/librivox-test-other.json
- /data/ASR/LibriSpeech/librispeech_withsp2/manifests/librivox-dev-clean.json
- /data/ASR/LibriSpeech/librispeech_withsp2/manifests/librivox-dev-other.json
- /data/ASR/LibriSpeech/librispeech_withsp2/manifests/librivox-test-clean.json
sample_rate: 16000
batch_size: 64
shuffle: false
num_workers: 8
pin_memory: true
use_start_end_token: false
is_tarred: false
tarred_audio_filepaths: na
tokenizer:
dir: /tokenizers/NeMo_ASR_SET/English/asr_set_2.0/tokenizer_spe_unigram_v1024/
type: bpe
model_path: nemo:977d4e24975b431ebb44f2dfcdea8778_tokenizer.model
vocab_path: nemo:29fcf21b9e6e4fb5b1ba0bb1efd4197e_vocab.txt
spe_tokenizer_vocab: nemo:cf241f7e4d904eaea46bb96f21dd0b1d_tokenizer.vocab
preprocessor:
_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
sample_rate: 16000
normalize: per_feature
window_size: 0.025
window_stride: 0.01
window: hann
features: 80
n_fft: 512
log: true
frame_splicing: 1
dither: 1.0e-05
pad_to: 0
pad_value: 0.0
spec_augment:
_target_: nemo.collections.asr.modules.SpectrogramAugmentation
freq_masks: 2
time_masks: 5
freq_width: 27
time_width: 0.05
encoder:
_target_: nemo.collections.asr.modules.ConformerEncoder
feat_in: 80
feat_out: -1
n_layers: 16
d_model: 176
subsampling: striding
subsampling_factor: 4
subsampling_conv_channels: 176
ff_expansion_factor: 4
self_attention_model: rel_pos
n_heads: 4
att_context_size:
- -1
- -1
xscaling: true
untie_biases: true
pos_emb_max_len: 5000
conv_kernel_size: 31
dropout: 0.1
dropout_emb: 0.0
dropout_att: 0.1
decoder:
_target_: nemo.collections.asr.modules.ConvASRDecoder
feat_in: 176
num_classes: 1024
vocabulary:
- <unk>
- s
- ▁the
- ▁a
- t
- ▁to
- ▁and
- ▁i
- ▁of
- ''''
- ed
- ▁in
- d
- ing
- 'n'
- e
- ▁it
- ▁that
- ▁you
- 'y'
- er
- r
- ▁for
- m
- ▁is
- ▁he
- re
- ▁was
- ▁be
- p
- ly
- ▁so
- ▁we
- a
- g
- o
- ▁c
- b
- u
- ▁on
- ▁have
- ▁but
- ll
- ▁with
- ▁re
- or
- ▁s
- al
- ▁do
- ▁know
- ar
- ▁they
- ▁not
- ▁as
- ▁this
- in
- le
- ▁e
- ▁are
- ▁like
- c
- ▁uh
- ri
- ▁me
- ▁his
- ▁at
- l
- es
- ▁de
- ▁yeah
- ▁can
- k
- ▁or
- ▁my
- ▁all
- ▁had
- ▁there
- ▁will
- ▁one
- il
- ▁no
- ▁what
- en
- ck
- ▁b
- ▁f
- ce
- ch
- i
- ▁by
- ▁she
- ▁from
- ▁an
- ic
- ur
- ve
- w
- ter
- la
- ▁if
- ▁just
- th
- li
-
- ▁her
- ▁um
- 'on'
- ation
- ▁w
- ▁would
- f
- te
- ▁st
- ▁go
- ir
- it
- ▁out
- ro
- ▁pa
- ▁were
- ▁g
- ▁t
- ion
- ▁think
- an
- ▁right
- ▁about
- se
- lo
- ent
- ▁up
- ment
- ate
- ▁when
- h
- ne
- ▁don
- ▁has
- ▁also
- ▁more
- ▁see
- ▁okay
- ▁their
- ▁your
- ge
- ▁who
- ▁well
- ▁co
- ▁which
- ▁some
- ▁se
- ▁time
- ▁ba
- ▁said
- ▁con
- ers
- ▁ra
- us
- de
- ra
- ▁him
- ▁our
- ▁been
- ▁fa
- ▁po
- ▁pro
- et
- x
- ▁la
- id
- ver
- ▁oh
- ▁ma
- v
- ▁now
- age
- ▁two
- ld
- ▁mo
- ▁how
- tion
- ▁people
- ive
- ▁other
- ng
- ity
- z
- ist
- ▁very
- ▁get
- ▁any
- ▁un
- ▁ro
- is
- ▁work
- ▁mean
- ▁them
- ▁lo
- vi
- ▁because
- ies
- ul
- as
- ad
- mp
- ▁bo
- '-'
- ▁then
- ▁good
- el
- nd
- ▁li
- ▁man
- ▁dis
- ▁could
- ▁ho
- at
- ol
- ▁bu
- ▁te
- ▁ha
- est
- me
- ▁say
- ru
- ke
- ▁sp
- ▁k
- able
- ▁su
- ▁sa
- ▁di
- ▁fi
- ance
- ▁really
- ▁over
- ▁even
- ry
- ▁us
- ▁ca
- ow
- ho
- ▁into
- ence
- mo
- ▁mi
- one
- qu
- ut
- lu
- ▁o
- ty
- ▁after
- ▁want
- ▁new
- ▁take
- ▁p
- ▁look
- ▁pre
- sh
- ▁day
- ▁should
- ▁th
- ▁need
- ▁cha
- co
- ▁much
- ▁where
- ▁d
- ant
- ▁fe
- ▁da
- ▁make
- om
- ▁did
- ▁le
- un
- ▁only
- im
- ▁these
- ff
- ti
- ish
- ▁ex
- ted
- ▁first
- he
- ig
- ▁vi
- ▁ri
- ▁en
- ▁com
- ated
- ▁than
- ma
- ▁way
- um
- ct
- end
- ight
- ▁here
- ▁ta
- ▁car
- ▁part
- ▁come
- ia
- ▁off
- ▁sc
- ▁ah
- am
- ▁tra
- ▁yes
- ▁back
- ture
- ful
- ▁pri
- ction
- ine
- ▁three
- ard
- ▁let
- pe
- ▁little
- ▁down
- mb
- ▁si
- ▁dr
- ▁mr
- ▁going
- ▁comp
- po
- ▁m
- ▁sta
- ▁gra
- day
- ▁many
- ian
- ta
- ▁long
- ▁pi
- ▁too
- ▁app
- ▁kind
- ous
- ci
- ▁ga
- ten
- nt
- ▁before
- ▁may
- ▁got
- man
- tic
- ition
- cu
- ugh
- tra
- ▁n
- ward
- ▁give
- ▁every
- ▁hi
- ting
- ▁exp
- ▁those
- ▁hu
- ot
- ▁something
- ▁lot
- ▁still
- ▁ne
- na
- ise
- pp
- ▁most
- ▁gu
- ▁state
- ▁actually
- ▁such
- ▁bi
- ▁never
- tain
- ▁great
- ▁through
- ▁al
- 'no'
- ▁mar
- ▁year
- ach
- les
- ▁school
- ally
- ial
- ha
- ▁old
- ▁made
- ary
- ▁ar
- ▁years
- ▁help
- ▁per
- ving
- ical
- ther
- ▁does
- ac
- ca
- ▁must
- di
- ▁own
- ▁ru
- ▁things
- ▁hand
- ▁thing
- ▁high
- ▁last
- go
- ▁sh
- ▁under
- ▁four
- ▁place
- ations
- ▁sure
- mi
- nce
- ▁am
- for
- ness
- ▁name
- ▁five
- ound
- ▁op
- ▁cons
- ▁ph
- ▁same
- row
- ven
- ph
- ite
- ▁pe
- j
- ▁sha
- ▁friend
- ▁wi
- ▁call
- ▁european
- ▁h
- ect
- ress
- ▁live
- port
- ▁mhm
- ▁house
- ie
- ni
- ▁plan
- ▁jo
- ▁play
- side
- ▁va
- min
- ious
- ▁life
- ▁du
- ▁ti
- ▁six
- ▁men
- ▁again
- ▁thank
- ▁talk
- par
- ▁home
- op
- ▁both
- ▁why
- ▁put
- ▁another
- nc
- ▁being
- mit
- ▁came
- led
- ▁fo
- ▁end
- ▁member
- ative
- ▁thought
- ▁tri
- iv
- our
- red
- ▁went
- lic
- ▁find
- ▁pu
- land
- ▁start
- ▁far
- ▁eu
- ▁imp
- ▁always
- ▁ju
- ▁wa
- ▁person
- ▁singapore
- ap
- ▁show
- ▁chi
- ▁ten
- ▁eight
- ▁while
- ▁point
- ▁y
- ▁ja
- ▁ya
- ling
- ctor
- ▁use
- ▁acc
- ▁world
- ▁pay
- ▁read
- va
- vo
- ▁change
- ▁u
- ▁pl
- ▁sw
- ▁war
- ▁might
- nk
- ments
- and
- ▁different
- ▁dec
- cent
- ▁ste
- ▁better
- ▁fun
- ▁month
- ship
- ton
- ▁tell
- ▁twenty
- ▁commission
- ▁exc
- ▁miss
- if
- ▁love
- ▁money
- ▁found
- ▁hundred
- gg
- ▁add
- ▁real
- ities
- ▁na
- ▁pass
- ▁didn
- ▁v
- ▁feel
- ▁week
- ▁win
- ible
- ▁try
- ▁upon
- ba
- ▁interest
- ▁inter
- son
- line
- ▁ob
- ▁boy
- ▁big
- ▁used
- ▁seven
- ▁away
- ▁family
- less
- ▁ki
- ber
- ▁around
- ▁turn
- ▁anything
- ▁care
- ▁young
- ▁guess
- ▁happen
- ▁course
- ▁agree
- ▁support
- ▁conf
- ual
- ▁number
- ▁trans
- ating
- ▁mister
- ▁hard
- ▁watch
- ft
- ▁next
- ▁sea
- ▁open
- ▁without
- duc
- gra
- ak
- ▁cap
- ▁cre
- hi
- ▁government
- ▁vo
- ▁between
- ▁each
- ▁ve
- ▁though
- ▁country
- ▁few
- ▁once
- ▁'
- ▁head
- ▁free
- ▁mu
- ▁maybe
- ▁act
- ▁night
- ▁thousand
- ▁face
- ▁uhhuh
- ▁keep
- ▁nine
- ▁close
- ▁case
- ▁che
- ▁against
- ▁done
- ▁ever
- ▁law
- ▁believe
- ▁public
- ▁room
- ▁sub
- ▁order
- ▁important
- ient
- ▁el
- ▁children
- ▁second
- ▁bri
- ▁business
- ▁hope
- ▁move
- fa
- ▁however
- ▁follow
- ▁able
- ▁word
- ▁yet
- ▁fla
- ▁stand
- ize
- ▁je
- ▁service
- ▁nothing
- ▁report
- ▁called
- ▁grow
- ▁continue
- ▁issue
- ▁since
- ▁book
- ▁lu
- ▁qui
- ▁develop
- ▁gen
- ▁certain
- light
- ▁cor
- ▁small
- ▁took
- ▁question
- ▁whole
- ▁problem
- ▁side
- ▁child
- ▁full
- ▁best
- ▁mm
- ▁probably
- fi
- ▁qua
- ▁sur
- ▁market
- ▁left
- ▁everything
- ▁during
- ▁understand
- ook
- wa
- ▁cent
- ▁water
- ▁quite
- ▁leave
- ▁himself
- ip
- ▁near
- ▁saw
- ▁together
- ▁large
- ▁having
- ▁already
- ▁invest
- ▁pretty
- ▁direct
- ▁hour
- ▁fact
- way
- ▁run
- ▁bra
- ▁clear
- ▁fra
- ▁area
- ▁union
- ▁enough
- ▁consider
- ▁lead
- ▁remain
- ▁president
- ▁system
- ▁def
- ▁stuff
- ▁food
- ▁job
- ▁heard
- ▁err
- ▁mind
- ▁rest
- ▁speak
- ▁asked
- ator
- ▁half
- ▁father
- com
- ▁less
- ▁arm
- ▁human
- ency
- ▁matter
- ▁group
- ▁girl
- ▁current
- ▁main
- ttle
- ▁later
- ▁learn
- ▁strong
- ▁sign
- ▁check
- ▁light
- ▁else
- ▁true
- ▁term
- qui
- ▁minute
- ▁spec
- ▁return
- ▁answer
- ▁reason
- ▁count
- ▁shall
- ▁communi
- ▁travel
- ▁wait
- ▁provide
- ▁low
- ▁mother
- ▁expect
- ▁cause
- ▁line
- ▁general
- lf
- ▁getting
- ▁parliament
- ▁bank
- ▁company
- ▁stop
- cause
- ▁power
- ▁gi
- ▁europe
- ▁moment
- ▁among
- ▁walk
- ▁allow
- ▁idea
- ▁office
- ▁town
- ▁cannot
- ▁countries
- ▁become
- ▁appear
- ▁present
- ▁bring
- ▁least
- ▁almost
- ▁kids
- ▁remember
- ▁include
- ▁short
- ▁sometimes
- ▁game
- ▁level
- ▁exactly
- ▁particular
- ▁social
- ▁land
- ▁woman
- ▁north
- ▁nice
- ▁concern
- ▁sort
- ▁effect
- ▁national
- ▁several
- ▁safe
- ▁until
- ▁further
- ▁cost
- ▁wonder
- ▁whether
- ▁either
- ▁future
- ▁pra
- ▁council
- ▁knew
- ▁common
- ▁south
- ▁making
- ▁morning
- ▁process
- ▁situation
- ▁white
- ▁result
- ▁suppose
- ▁employ
- ▁political
- ▁program
- ▁along
- ▁women
- ▁ski
- ▁court
- ▁please
- ▁shi
- ▁possible
- ▁protect
- ▁experience
- ▁definitely
- ▁require
- ▁account
- ▁myself
- ▁black
- ▁example
- ▁america
- ▁thirty
- ▁student
- ▁view
- ▁product
- ▁wife
- ▁health
- ▁major
- ▁difficult
- ▁death
- ▁visit
- ▁across
- ▁receive
- ▁voice
- ▁citizen
- ▁regard
- ▁author
- ▁treat
- ▁especially
- ▁local
- ▁taking
- ▁information
- ▁seemed
- ▁success
- ability
- ▁break
- ▁whatever
- ▁security
- ▁address
- ▁felt
- ▁fifty
- ▁million
- ▁third
- ▁usually
- ▁gonna
- ▁brother
- ▁began
- ▁period
- ▁east
- ▁economic
- ▁increase
- ▁financial
- ▁respect
- ▁enjoy
- ▁christ
- ▁education
- ▁brought
- ▁organ
- ▁parents
- ▁policy
- ▁round
- ▁became
- ▁region
- ▁lady
- ▁discuss
- ▁single
- ▁early
- ▁couple
- ▁type
- ▁itself
- ▁serve
- ▁measure
- ▁husband
- ified
- ▁music
- ▁ground
- ▁companies
- ▁street
- ▁behind
- ▁value
- ▁therefore
- ▁police
- ▁complete
- ▁john
- ▁daughter
- ▁affect
- ▁perhaps
- ▁international
- ▁themselves
- ▁improve
- ▁condition
- ▁hotel
- ▁deliver
- ▁sense
- ▁relation
- ▁sorry
- ▁credit
- ▁effort
- ▁instead
- ▁york
- ▁united
- ▁partner
- ▁spoke
- ▁strange
- ▁everybody
- ▁horse
- ▁depend
- ▁subject
- ▁project
- ▁approach
- ▁involve
- ▁listen
- ▁draw
- ▁computer
- ▁married
- ▁record
- ▁happy
- ▁sudden
- ▁represent
- ▁somebody
- ▁correct
- ▁serious
- ▁decision
- ▁society
- ▁including
- ▁college
- ▁english
- ▁attack
- ▁perform
- ▁cross
- ▁accept
- ▁control
- ▁flow
- ▁although
- ▁drink
- ▁front
- ▁wrong
- ▁twi
- ▁according
- ▁slow
- ▁peace
- ▁amount
- ▁object
- ▁movie
- ▁benefit
- ▁yup
- ▁challenge
- ▁private
- ▁church
- ▁wood
- ▁field
- ▁above
- ▁ensure
- ▁immediate
- ▁figure
- ▁foreign
- ▁available
- ▁insurance
- ▁proposal
- ▁doubt
- ▁strength
- ▁difference
- ▁stood
- ▁implement
- ▁economy
- ▁detail
- ▁umhum
- ▁restaurant
- ▁collect
- ▁global
- ▁broke
- q
optim:
name: adamw
lr: 2.0
betas:
- 0.9
- 0.98
weight_decay: 0
sched:
name: NoamAnnealing
d_model: 176
warmup_steps: 10000
warmup_ratio: null
min_lr: 1.0e-06
target: nemo.collections.asr.models.ctc_bpe_models.EncDecCTCModelBPE
nemo_version: 2.0.0
decoding:
strategy: greedy_batch
preserve_alignments: null
compute_timestamps: null
word_seperator: ' '
ctc_timestamp_type: all
batch_dim_index: 0
greedy:
preserve_alignments: false
compute_timestamps: false
preserve_frame_confidence: false
confidence_method_cfg:
name: entropy
entropy_type: tsallis
alpha: 0.33
entropy_norm: exp
temperature: DEPRECATED
beam:
beam_size: 4
search_type: default
preserve_alignments: false
compute_timestamps: false
return_best_hypothesis: true
beam_alpha: 1.0
beam_beta: 0.0
kenlm_path: null
flashlight_cfg:
lexicon_path: null
boost_path: null
beam_size_token: 16
beam_threshold: 20.0
unk_weight: -.inf
sil_weight: 0.0
pyctcdecode_cfg:
beam_prune_logp: -10.0
token_min_logp: -5.0
prune_history: false
hotwords: null
hotword_weight: 10.0
wfst:
beam_size: 4
search_type: riva
return_best_hypothesis: true
preserve_alignments: false
compute_timestamps: false
decoding_mode: nbest
open_vocabulary_decoding: false
beam_width: 10.0
lm_weight: 1.0
device: cuda
arpa_lm_path: null
wfst_lm_path: null
riva_decoding_cfg: {}
k2_decoding_cfg:
search_beam: 20.0
output_beam: 10.0
min_active_states: 30
max_active_states: 10000
confidence_cfg:
preserve_frame_confidence: false
preserve_token_confidence: false
preserve_word_confidence: false
exclude_blank: true
aggregation: min
tdt_include_duration: false
method_cfg:
name: entropy
entropy_type: tsallis
alpha: 0.33
entropy_norm: exp
temperature: DEPRECATED
temperature: 1.0