tanbw
/

CosyVoice

ONNX

Model card Files Files and versions Community

CosyVoice / CosyVoice-ttsfrd /resource /festival /voices /english /rab_diphone /festvox /rab_diphone.scm

tanbw

Add files using upload-large-folder tool

92674ed verified about 2 months ago

raw

history blame

11.1 kB

	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
	;;; ;;
	;;; Centre for Speech Technology Research ;;
	;;; University of Edinburgh, UK ;;
	;;; Copyright (c) 1996,1997 ;;
	;;; All Rights Reserved. ;;
	;;; ;;
	;;; Permission is hereby granted, free of charge, to use and distribute ;;
	;;; this software and its documentation without restriction, including ;;
	;;; without limitation the rights to use, copy, modify, merge, publish, ;;
	;;; distribute, sublicense, and/or sell copies of this work, and to ;;
	;;; permit persons to whom this work is furnished to do so, subject to ;;
	;;; the following conditions: ;;
	;;; 1. The code must retain the above copyright notice, this list of ;;
	;;; conditions and the following disclaimer. ;;
	;;; 2. Any modifications must be clearly marked as such. ;;
	;;; 3. Original authors' names are not deleted. ;;
	;;; 4. The authors' names are not used to endorse or promote products ;;
	;;; derived from this software without specific prior written ;;
	;;; permission. ;;
	;;; ;;
	;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
	;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
	;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
	;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
	;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
	;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
	;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
	;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
	;;; THIS SOFTWARE. ;;
	;;; ;;
	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
	;;; Set up rab_diphones using the standard UniSyn diphone synthesizer
	;;;
	;;; Roger diphones: male RP English collected October 1996
	;;;

	(defvar rab_diphone_dir (cdr (assoc 'rab_diphone voice-locations))
	"rab_diphone_dir
	The default directory for the rab diphone database.")

	(require 'mrpa_phones)
	(require 'pos)
	(require 'phrase)
	(require 'tobi)
	(require 'f2bf0lr)
	(require 'mrpa_durs)
	(require 'gswdurtreeZ)
	(require_module 'UniSyn)

	(setup_oald_lex)
	;; set this to lpc or psola
	(defvar rab_sigpr 'lpc)
	;; Rset this to ungroup for ungrouped version
	(defvar rab_groupungroup 'group)

	(if (probe_file (path-append rab_diphone_dir "group/rablpc16k.group"))
	(defvar rab_index_file
	(path-append rab_diphone_dir "group/rablpc16k.group"))
	(defvar rab_index_file
	(path-append rab_diphone_dir "group/rablpc8k.group")))

	(set! rab_psola_sep
	(list
	'(name "rab_psola_sep")
	(list 'index_file (path-append rab_diphone_dir "dic/diphdic_full.est"))
	'(grouped "false")
	(list 'coef_dir (path-append rab_diphone_dir "pm"))
	(list 'sig_dir (path-append rab_diphone_dir "wav"))
	'(coef_ext ".pm")
	'(sig_ext ".wav")))

	(set! rab_lpc_sep
	(list
	'(name "rab_lpc_sep")
	(list 'index_file (path-append rab_diphone_dir "dic/diphdic_full.est"))
	'(grouped "false")
	(list 'coef_dir (path-append rab_diphone_dir "lpc"))
	(list 'sig_dir (path-append rab_diphone_dir "lpc"))
	'(coef_ext ".lpc")
	'(sig_ext ".res")))

	(set! rab_psola_group
	(list
	'(name "rab_psola_group")
	(list 'index_file
	(path-append rab_diphone_dir "group/rab.group"))
	'(grouped "true")))

	(set! rab_lpc_group
	(list
	'(name "rab_lpc_group")
	(list 'index_file rab_index_file)
	'(alternates_left ((i ii) (ll l) (u uu) (i@ ii) (uh @) (a aa)
	(u@ uu) (w @) (o oo) (e@ ei) (e ei)
	(r @)))
	'(alternates_right ((i ii) (ll l) (u uu) (i@ ii)
	(y i) (uh @) (r @) (w @)))
	'(default_diphone @-@@)
	'(grouped "true")))

	;;; Setup the desried DB
	(cond
	((and (eq rab_sigpr 'psola)
	(eq rab_groupungroup 'group))
	(set! rab_db_name (us_diphone_init rab_psola_group)))
	((and (eq rab_sigpr 'psola)
	(eq rab_groupungroup 'ungroup))
	(set! rab_db_name (us_diphone_init rab_psola_sep)))
	((and (eq rab_sigpr 'lpc)
	(eq rab_groupungroup 'group))
	(set! rab_db_name (us_diphone_init rab_lpc_group)))
	((and (eq rab_sigpr 'lpc)
	(eq rab_groupungroup 'ungroup))
	(set! rab_db_name (us_diphone_init rab_lpc_sep))))

	(define (rab_postlex_syllabics utt)
	"(rab_postlex_syllabics utt)
	Because the lexicon is somewhat random in its used of syllable l n and
	m this is designed to post process the output inserting schwa before
	them. Ideally the lexicon should be fixed."
	(mapcar
	(lambda (s)
	(if (and (member_string (item.name s) '("l" "n" "m"))
	(string-equal "coda" (item.feat s "seg_onsetcoda"))
	(not (member_string (item.feat s "p.name") '(l r)))
	(string-equal "-" (item.feat s "p.ph_vc")))
	(item.relation.insert
	s 'SylStructure
	(item.insert s (list "@") 'before)
	'before)))
	(utt.relation.items utt 'Segment)))

	(define (rab_diphone_const_clusters utt)
	"(rab_diphone_const_clusters UTT)
	Identify consonant clusters, dark ls etc in the segment item
	ready for diphone resynthesis. This may be called as a post lexical
	rule through poslex_rule_hooks."
	(mapcar
	(lambda (s) (rab_diphone_fix_phone_name utt s))
	(utt.relation.items utt 'Segment))
	utt)

	(define (rab_diphone_fix_phone_name utt seg)
	"(rab_diphone_fix_phone_name UTT SEG)
	Add the feature diphone_phone_name to given segment with the appropriate
	name for constructing a diphone. Basically adds _ if either side is part
	of the same consonant cluster, adds $ either side if in different
	syllable for preceding/succeeding vowel syllable, and converts l to ll
	in coda part of syllables."
	(let ((name (item.name seg)))
	(cond
	((string-equal name "#") t)
	((string-equal "-" (item.feat seg 'ph_vc))
	(if (and (member_string name '(r w y l))
	(member_string (item.feat seg "p.name") '(p t k b d g))
	(item.relation.prev seg "SylStructure"))
	(item.set_feat seg "us_diphone_right" (format nil "_%s" name)))
	(if (and (member_string name '(w y l m n p t k))
	(string-equal (item.feat seg "p.name") 's)
	(item.relation.prev seg "SylStructure"))
	(item.set_feat seg "us_diphone_right" (format nil "_%s" name)))
	(if (and (string-equal name 's)
	(member_string (item.feat seg "n.name") '(w y l m n p t k))
	(item.relation.next seg "SylStructure"))
	(item.set_feat seg "us_diphone_left" (format nil "%s_" name)))
	(if (and (member_string name '(p t k b d g))
	(member_string (item.feat seg "n.name") '(r w y l))
	(item.relation.next seg "SylStructure"))
	(item.set_feat seg "us_diphone_left" (format nil "%s_" name)))
	(if (and (member_string name '(p k b d g))
	(string-equal "+" (item.feat seg 'p.ph_vc))
	(not (member_string (item.feat seg "p.name") '(@ aa o)))
	(not (item.relation.prev seg "SylStructure")))
	(item.set_feat seg "us_diphone_right" (format nil "$%s" name)))
	(if (and (member_string name '(p t k b d g))
	(string-equal "+" (item.feat seg 'n.ph_vc))
	(not (member_string (item.feat seg "n.name") '(@ aa)))
	(not (item.relation.next seg "SylStructure")))
	(item.set_feat seg "us_diphone_left" (format nil "%s$" name)))
	(if (and (string-equal "l" name)
	(string-equal "+" (item.feat seg "p.ph_vc"))
	(not (string-equal "a" (item.feat seg "p.ph_vlng")))
	(item.relation.prev seg 'SylStructure))
	(item.set_feat seg "us_diphone_right" "ll"))
	(if (and (member_string name '(ch jh))
	(string-equal "+" (item.feat seg 'p.ph_vc)))
	(item.set_feat seg "us_diphone_right" "t"))
	)
	)))

	(define (voice_rab_diphone)
	"(voice_rab_diphone)
	Set up the current voice to be a British male RP (Roger) speaker using
	the rab diphone set."
	(voice_reset)
	(Parameter.set 'Language 'britishenglish)
	;; Phone set
	(Parameter.set 'PhoneSet 'mrpa)
	(PhoneSet.select 'mrpa)
	;; Tokenization rules
	(set! token_to_words english_token_to_words)
	;; POS tagger
	(set! pos_lex_name "english_poslex")
	(set! pos_ngram_name 'english_pos_ngram)
	(set! pos_supported t)
	(set! guess_pos english_guess_pos) ;; need this for accents
	;; Lexicon selection
	(lex.select "oald")
	(set! postlex_rules_hooks (list postlex_apos_s_check
	rab_postlex_syllabics))
	(set! postlex_s_check postlex_apos_s_check)
	;; Phrase prediction
	(Parameter.set 'Phrase_Method 'prob_models)
	(set! phr_break_params english_phr_break_params)
	;; Accent and tone prediction
	(set! int_tone_cart_tree f2b_int_tone_cart_tree)
	(set! int_accent_cart_tree f2b_int_accent_cart_tree)
	;; F0 prediction
	(set! f0_lr_start f2b_f0_lr_start)
	(set! f0_lr_mid f2b_f0_lr_mid)
	(set! f0_lr_end f2b_f0_lr_end)
	(Parameter.set 'Int_Method Intonation_Tree)
	(set! int_lr_params
	'((target_f0_mean 105) (target_f0_std 14)
	(model_f0_mean 170) (model_f0_std 34)))
	(Parameter.set 'Int_Target_Method Int_Targets_LR)
	;; Duration prediction -- use gsw durations
	(set! duration_cart_tree gsw_duration_cart_tree)
	(set! duration_ph_info gsw_durs)
	(Parameter.set 'Duration_Method Duration_Tree_ZScores)
	(Parameter.set 'Duration_Stretch 1.05)
	;; Waveform synthesizer: Roger diphones
	;; This assigned the diphone names from their context (_ $ etc)
	(set! UniSyn_module_hooks (list rab_diphone_const_clusters ))
	(set! us_abs_offset 0.0)
	(set! window_factor 1.0)
	(set! us_rel_offset 0.0)
	(set! us_gain 0.9)

	(Parameter.set 'Synth_Method 'UniSyn)
	(Parameter.set 'us_sigpr rab_sigpr)
	(us_db_select rab_db_name)

	(set! current-voice 'rab_diphone)
	)

	(proclaim_voice
	'rab_diphone
	'((language english)
	(gender male)
	(dialect british)
	(description
	"This voice provides a British RP English male voice using a
	residual excited LPC diphone synthesis method. It uses a
	modified Oxford Advanced Learners' Dictionary for pronunciations.
	Prosodic phrasing is provided by a statistically trained model
	using part of speech and local distribution of breaks. Intonation
	is provided by a CART tree predicting ToBI accents and an F0
	contour generated from a model trained from natural speech. The
	duration model is also trained from data using a CART tree.")))

	(provide 'rab_diphone)