tanbw commited on
Commit
dbe1964
1 Parent(s): 598a15d

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. CosyVoice-300M-Instruct/.msc +0 -0
  2. CosyVoice-300M-Instruct/.mv +1 -0
  3. CosyVoice-300M-Instruct/README.md +150 -0
  4. CosyVoice-300M-Instruct/configuration.json +1 -0
  5. CosyVoice-300M-Instruct/cosyvoice.yaml +197 -0
  6. CosyVoice-300M-SFT/.msc +0 -0
  7. CosyVoice-300M-SFT/.mv +1 -0
  8. CosyVoice-300M-SFT/README.md +150 -0
  9. CosyVoice-300M-SFT/configuration.json +1 -0
  10. CosyVoice-300M-SFT/cosyvoice.yaml +197 -0
  11. CosyVoice-300M/.msc +0 -0
  12. CosyVoice-300M/.mv +1 -0
  13. CosyVoice-300M/README.md +150 -0
  14. CosyVoice-300M/configuration.json +1 -0
  15. CosyVoice-300M/cosyvoice.yaml +197 -0
  16. CosyVoice-ttsfrd/.gitattributes +38 -0
  17. CosyVoice-ttsfrd/README.md +150 -0
  18. CosyVoice-ttsfrd/configuration.json +1 -0
  19. CosyVoice-ttsfrd/resource/festival/Singing.v0_1.dtd +34 -0
  20. CosyVoice-ttsfrd/resource/festival/apml.scm +551 -0
  21. CosyVoice-ttsfrd/resource/festival/apml_f2bf0lr.scm +530 -0
  22. CosyVoice-ttsfrd/resource/festival/apml_kaldurtreeZ.scm +996 -0
  23. CosyVoice-ttsfrd/resource/festival/cart_aux.scm +200 -0
  24. CosyVoice-ttsfrd/resource/festival/clunits.scm +287 -0
  25. CosyVoice-ttsfrd/resource/festival/clunits_build.scm +479 -0
  26. CosyVoice-ttsfrd/resource/festival/cmusphinx2_phones.scm +119 -0
  27. CosyVoice-ttsfrd/resource/festival/cslush.scm +79 -0
  28. CosyVoice-ttsfrd/resource/festival/cstr.scm +121 -0
  29. CosyVoice-ttsfrd/resource/festival/darpa_phones.scm +115 -0
  30. CosyVoice-ttsfrd/resource/festival/display.scm +69 -0
  31. CosyVoice-ttsfrd/resource/festival/duration.scm +196 -0
  32. CosyVoice-ttsfrd/resource/festival/email-mode.scm +89 -0
  33. CosyVoice-ttsfrd/resource/festival/engmorph.scm +151 -0
  34. CosyVoice-ttsfrd/resource/festival/engmorphsyn.scm +170 -0
  35. CosyVoice-ttsfrd/resource/festival/f2bdurtreeZ.scm +869 -0
  36. CosyVoice-ttsfrd/resource/festival/f2bf0lr.scm +314 -0
  37. CosyVoice-ttsfrd/resource/festival/festdoc.scm +178 -0
  38. CosyVoice-ttsfrd/resource/festival/festival.el +282 -0
  39. CosyVoice-ttsfrd/resource/festival/festival.scm +633 -0
  40. CosyVoice-ttsfrd/resource/festival/festtest.scm +72 -0
  41. CosyVoice-ttsfrd/resource/festival/fringe.scm +108 -0
  42. CosyVoice-ttsfrd/resource/festival/gswdurtreeZ.scm +947 -0
  43. CosyVoice-ttsfrd/resource/festival/holmes_phones.scm +118 -0
  44. CosyVoice-ttsfrd/resource/festival/hts.scm +522 -0
  45. CosyVoice-ttsfrd/resource/festival/init.scm +157 -0
  46. CosyVoice-ttsfrd/resource/festival/intonation.scm +187 -0
  47. CosyVoice-ttsfrd/resource/festival/java.scm +39 -0
  48. CosyVoice-ttsfrd/resource/festival/klatt_durs.scm +85 -0
  49. CosyVoice-ttsfrd/resource/festival/languages.scm +122 -0
  50. CosyVoice-ttsfrd/resource/festival/lexicons.scm +274 -0
CosyVoice-300M-Instruct/.msc ADDED
Binary file (625 Bytes). View file
 
CosyVoice-300M-Instruct/.mv ADDED
@@ -0,0 +1 @@
 
 
1
+ Revision:master,CreatedAt:1720198244
CosyVoice-300M-Instruct/README.md ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CosyVoice
2
+ ## 👉🏻 [CosyVoice Demos](https://fun-audio-llm.github.io/) 👈🏻
3
+ [[CosyVoice Paper](https://fun-audio-llm.github.io/pdf/CosyVoice_v1.pdf)][[CosyVoice Studio](https://www.modelscope.cn/studios/iic/CosyVoice-300M)][[CosyVoice Code](https://github.com/FunAudioLLM/CosyVoice)]
4
+
5
+ For `SenseVoice`, visit [SenseVoice repo](https://github.com/FunAudioLLM/SenseVoice) and [SenseVoice space](https://www.modelscope.cn/studios/iic/SenseVoice).
6
+
7
+ ## Install
8
+
9
+ **Clone and install**
10
+
11
+ - Clone the repo
12
+ ``` sh
13
+ git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
14
+ # If you failed to clone submodule due to network failures, please run following command until success
15
+ cd CosyVoice
16
+ git submodule update --init --recursive
17
+ ```
18
+
19
+ - Install Conda: please see https://docs.conda.io/en/latest/miniconda.html
20
+ - Create Conda env:
21
+
22
+ ``` sh
23
+ conda create -n cosyvoice python=3.8
24
+ conda activate cosyvoice
25
+ pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
26
+
27
+ # If you encounter sox compatibility issues
28
+ # ubuntu
29
+ sudo apt-get install sox libsox-dev
30
+ # centos
31
+ sudo yum install sox sox-devel
32
+ ```
33
+
34
+ **Model download**
35
+
36
+ We strongly recommand that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `speech_kantts_ttsfrd` resource.
37
+
38
+ If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
39
+
40
+ ``` python
41
+ # SDK模型下载
42
+ from modelscope import snapshot_download
43
+ snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
44
+ snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
45
+ snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
46
+ snapshot_download('iic/speech_kantts_ttsfrd', local_dir='pretrained_models/speech_kantts_ttsfrd')
47
+ ```
48
+
49
+ ``` sh
50
+ # git模型下载,请确保已安装git lfs
51
+ mkdir -p pretrained_models
52
+ git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
53
+ git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
54
+ git clone https://www.modelscope.cn/iic/CosyVoice-300M-Instruct.git pretrained_models/CosyVoice-300M-Instruct
55
+ git clone https://www.modelscope.cn/iic/speech_kantts_ttsfrd.git pretrained_models/speech_kantts_ttsfrd
56
+ ```
57
+
58
+ Unzip `ttsfrd` resouce and install `ttsfrd` package
59
+ ``` sh
60
+ cd pretrained_models/speech_kantts_ttsfrd/
61
+ unzip resource.zip -d .
62
+ pip install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl
63
+ ```
64
+
65
+ **Basic Usage**
66
+
67
+ For zero_shot/cross_lingual inference, please use `CosyVoice-300M` model.
68
+ For sft inference, please use `CosyVoice-300M-SFT` model.
69
+ For instruct inference, please use `CosyVoice-300M-Instruct` model.
70
+ First, add `third_party/AcademiCodec` and `third_party/Matcha-TTS` to your `PYTHONPATH`.
71
+
72
+ ``` sh
73
+ export PYTHONPATH=third_party/AcademiCodec:third_party/Matcha-TTS
74
+ ```
75
+
76
+ ``` python
77
+ from cosyvoice.cli.cosyvoice import CosyVoice
78
+ from cosyvoice.utils.file_utils import load_wav
79
+ import torchaudio
80
+
81
+ cosyvoice = CosyVoice('speech_tts/CosyVoice-300M-SFT')
82
+ # sft usage
83
+ print(cosyvoice.list_avaliable_spks())
84
+ output = cosyvoice.inference_sft('你好,我是通义生成式语音大模型,请问有什么可以帮您的吗?', '中文女')
85
+ torchaudio.save('sft.wav', output['tts_speech'], 22050)
86
+
87
+ cosyvoice = CosyVoice('speech_tts/CosyVoice-300M')
88
+ # zero_shot usage
89
+ prompt_speech_16k = load_wav('zero_shot_prompt.wav', 16000)
90
+ output = cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '希望你以后能够做的比我还好呦。', prompt_speech_16k)
91
+ torchaudio.save('zero_shot.wav', output['tts_speech'], 22050)
92
+ # cross_lingual usage
93
+ prompt_speech_16k = load_wav('cross_lingual_prompt.wav', 16000)
94
+ output = cosyvoice.inference_cross_lingual('<|en|>And then later on, fully acquiring that company. So keeping management in line, interest in line with the asset that\'s coming into the family is a reason why sometimes we don\'t buy the whole thing.', prompt_speech_16k)
95
+ torchaudio.save('cross_lingual.wav', output['tts_speech'], 22050)
96
+
97
+ cosyvoice = CosyVoice('speech_tts/CosyVoice-300M-Instruct')
98
+ # instruct usage
99
+ output = cosyvoice.inference_instruct('在面对挑战时,他展现了非凡的<strong>勇气</strong>与<strong>智慧</strong>。', '中文男', 'Theo \'Crimson\', is a fiery, passionate rebel leader. Fights with fervor for justice, but struggles with impulsiveness.')
100
+ torchaudio.save('instruct.wav', output['tts_speech'], 22050)
101
+ ```
102
+
103
+ **Start web demo**
104
+
105
+ You can use our web demo page to get familiar with CosyVoice quickly.
106
+ We support sft/zero_shot/cross_lingual/instruct inference in web demo.
107
+
108
+ Please see the demo website for details.
109
+
110
+ ``` python
111
+ # change speech_tts/CosyVoice-300M-SFT for sft inference, or speech_tts/CosyVoice-300M-Instruct for instruct inference
112
+ python3 webui.py --port 50000 --model_dir speech_tts/CosyVoice-300M
113
+ ```
114
+
115
+ **Advanced Usage**
116
+
117
+ For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`.
118
+ You can get familiar with CosyVoice following this recipie.
119
+
120
+ **Build for deployment**
121
+
122
+ Optionally, if you want to use grpc for service deployment,
123
+ you can run following steps. Otherwise, you can just ignore this step.
124
+
125
+ ``` sh
126
+ cd runtime/python
127
+ docker build -t cosyvoice:v1.0 .
128
+ # change speech_tts/CosyVoice-300M to speech_tts/CosyVoice-300M-Instruct if you want to use instruct inference
129
+ docker run -d --runtime=nvidia -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python && python3 server.py --port 50000 --max_conc 4 --model_dir speech_tts/CosyVoice-300M && sleep infinity"
130
+ python3 client.py --port 50000 --mode <sft|zero_shot|cross_lingual|instruct>
131
+ ```
132
+
133
+ ## Discussion & Communication
134
+
135
+ You can directly discuss on [Github Issues](https://github.com/FunAudioLLM/CosyVoice/issues).
136
+
137
+ You can also scan the QR code to join our officla Dingding chat group.
138
+
139
+ <img src="./asset/dingding.png" width="250px">
140
+
141
+ ## Acknowledge
142
+
143
+ 1. We borrowed a lot of code from [FunASR](https://github.com/modelscope/FunASR).
144
+ 2. We borrowed a lot of code from [FunCodec](https://github.com/modelscope/FunCodec).
145
+ 3. We borrowed a lot of code from [Matcha-TTS](https://github.com/shivammehta25/Matcha-TTS).
146
+ 4. We borrowed a lot of code from [AcademiCodec](https://github.com/yangdongchao/AcademiCodec).
147
+ 5. We borrowed a lot of code from [WeNet](https://github.com/wenet-e2e/wenet).
148
+
149
+ ## Disclaimer
150
+ The content provided above is for academic purposes only and is intended to demonstrate technical capabilities. Some examples are sourced from the internet. If any content infringes on your rights, please contact us to request its removal.
CosyVoice-300M-Instruct/configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework":"Pytorch","task":"text-to-speech"}
CosyVoice-300M-Instruct/cosyvoice.yaml ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # set random seed, so that you may reproduce your result.
2
+ __set_seed1: !apply:random.seed [1986]
3
+ __set_seed2: !apply:numpy.random.seed [1986]
4
+ __set_seed3: !apply:torch.manual_seed [1986]
5
+ __set_seed4: !apply:torch.cuda.manual_seed_all [1986]
6
+
7
+ # fixed params
8
+ sample_rate: 22050
9
+ text_encoder_input_size: 512
10
+ llm_input_size: 1024
11
+ llm_output_size: 1024
12
+ spk_embed_dim: 192
13
+
14
+ # model params
15
+ # for all class/function included in this repo, we use !<name> or !<new> for intialization, so that user may find all corresponding class/function according to one single yaml.
16
+ # for system/third_party class/function, we do not require this.
17
+ llm: !new:cosyvoice.llm.llm.TransformerLM
18
+ text_encoder_input_size: !ref <text_encoder_input_size>
19
+ llm_input_size: !ref <llm_input_size>
20
+ llm_output_size: !ref <llm_output_size>
21
+ text_token_size: 51866
22
+ speech_token_size: 4096
23
+ length_normalized_loss: True
24
+ lsm_weight: 0
25
+ spk_embed_dim: !ref <spk_embed_dim>
26
+ text_encoder: !new:cosyvoice.transformer.encoder.ConformerEncoder
27
+ input_size: !ref <text_encoder_input_size>
28
+ output_size: 1024
29
+ attention_heads: 16
30
+ linear_units: 4096
31
+ num_blocks: 6
32
+ dropout_rate: 0.1
33
+ positional_dropout_rate: 0.1
34
+ attention_dropout_rate: 0
35
+ normalize_before: True
36
+ input_layer: 'linear'
37
+ pos_enc_layer_type: 'rel_pos_espnet'
38
+ selfattention_layer_type: 'rel_selfattn'
39
+ use_cnn_module: False
40
+ macaron_style: False
41
+ use_dynamic_chunk: False
42
+ use_dynamic_left_chunk: False
43
+ static_chunk_size: 1
44
+ llm: !new:cosyvoice.transformer.encoder.TransformerEncoder
45
+ input_size: !ref <llm_input_size>
46
+ output_size: !ref <llm_output_size>
47
+ attention_heads: 16
48
+ linear_units: 4096
49
+ num_blocks: 14
50
+ dropout_rate: 0.1
51
+ positional_dropout_rate: 0.1
52
+ attention_dropout_rate: 0
53
+ input_layer: 'linear_legacy'
54
+ pos_enc_layer_type: 'rel_pos_espnet'
55
+ selfattention_layer_type: 'rel_selfattn'
56
+ static_chunk_size: 1
57
+
58
+ flow: !new:cosyvoice.flow.flow.MaskedDiffWithXvec
59
+ input_size: 512
60
+ output_size: 80
61
+ spk_embed_dim: !ref <spk_embed_dim>
62
+ output_type: 'mel'
63
+ vocab_size: 4096
64
+ input_frame_rate: 50
65
+ only_mask_loss: True
66
+ encoder: !new:cosyvoice.transformer.encoder.ConformerEncoder
67
+ output_size: 512
68
+ attention_heads: 8
69
+ linear_units: 2048
70
+ num_blocks: 6
71
+ dropout_rate: 0.1
72
+ positional_dropout_rate: 0.1
73
+ attention_dropout_rate: 0.1
74
+ normalize_before: True
75
+ input_layer: 'linear'
76
+ pos_enc_layer_type: 'rel_pos_espnet'
77
+ selfattention_layer_type: 'rel_selfattn'
78
+ input_size: 512
79
+ use_cnn_module: False
80
+ macaron_style: False
81
+ length_regulator: !new:cosyvoice.flow.length_regulator.InterpolateRegulator
82
+ channels: 80
83
+ sampling_ratios: [1, 1, 1, 1]
84
+ decoder: !new:cosyvoice.flow.flow_matching.ConditionalCFM
85
+ in_channels: 240
86
+ n_spks: 1
87
+ spk_emb_dim: 80
88
+ cfm_params: !new:omegaconf.DictConfig
89
+ content:
90
+ sigma_min: 1e-06
91
+ solver: 'euler'
92
+ t_scheduler: 'cosine'
93
+ training_cfg_rate: 0.2
94
+ inference_cfg_rate: 0.7
95
+ reg_loss_type: 'l1'
96
+ estimator: !new:cosyvoice.flow.decoder.ConditionalDecoder
97
+ in_channels: 320
98
+ out_channels: 80
99
+ channels: [256, 256]
100
+ dropout: 0
101
+ attention_head_dim: 64
102
+ n_blocks: 4
103
+ num_mid_blocks: 12
104
+ num_heads: 8
105
+ act_fn: 'gelu'
106
+
107
+ hift: !new:cosyvoice.hifigan.generator.HiFTGenerator
108
+ in_channels: 80
109
+ base_channels: 512
110
+ nb_harmonics: 8
111
+ sampling_rate: !ref <sample_rate>
112
+ nsf_alpha: 0.1
113
+ nsf_sigma: 0.003
114
+ nsf_voiced_threshold: 10
115
+ upsample_rates: [8, 8]
116
+ upsample_kernel_sizes: [16, 16]
117
+ istft_params:
118
+ n_fft: 16
119
+ hop_len: 4
120
+ resblock_kernel_sizes: [3, 7, 11]
121
+ resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
122
+ source_resblock_kernel_sizes: [7, 11]
123
+ source_resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5]]
124
+ lrelu_slope: 0.1
125
+ audio_limit: 0.99
126
+ f0_predictor: !new:cosyvoice.hifigan.f0_predictor.ConvRNNF0Predictor
127
+ num_class: 1
128
+ in_channels: 80
129
+ cond_channels: 512
130
+
131
+ # processor functions
132
+ parquet_opener: !name:cosyvoice.dataset.processor.parquet_opener
133
+ get_tokenizer: !name:whisper.tokenizer.get_tokenizer
134
+ multilingual: True
135
+ num_languages: 100
136
+ language: 'en'
137
+ task: 'transcribe'
138
+ allowed_special: 'all'
139
+ tokenize: !name:cosyvoice.dataset.processor.tokenize
140
+ get_tokenizer: !ref <get_tokenizer>
141
+ allowed_special: !ref <allowed_special>
142
+ filter: !name:cosyvoice.dataset.processor.filter
143
+ max_length: 40960
144
+ min_length: 0
145
+ token_max_length: 200
146
+ token_min_length: 1
147
+ resample: !name:cosyvoice.dataset.processor.resample
148
+ resample_rate: !ref <sample_rate>
149
+ feat_extractor: !name:matcha.utils.audio.mel_spectrogram
150
+ n_fft: 1024
151
+ num_mels: 80
152
+ sampling_rate: !ref <sample_rate>
153
+ hop_size: 256
154
+ win_size: 1024
155
+ fmin: 0
156
+ fmax: 8000
157
+ center: False
158
+ compute_fbank: !name:cosyvoice.dataset.processor.compute_fbank
159
+ feat_extractor: !ref <feat_extractor>
160
+ parse_embedding: !name:cosyvoice.dataset.processor.parse_embedding
161
+ normalize: True
162
+ shuffle: !name:cosyvoice.dataset.processor.shuffle
163
+ shuffle_size: 1000
164
+ sort: !name:cosyvoice.dataset.processor.sort
165
+ sort_size: 500 # sort_size should be less than shuffle_size
166
+ batch: !name:cosyvoice.dataset.processor.batch
167
+ batch_type: 'dynamic'
168
+ max_frames_in_batch: 2000
169
+ padding: !name:cosyvoice.dataset.processor.padding
170
+
171
+ # dataset processor pipeline
172
+ data_pipeline: [
173
+ !ref <parquet_opener>,
174
+ !ref <tokenize>,
175
+ !ref <filter>,
176
+ !ref <resample>,
177
+ !ref <compute_fbank>,
178
+ !ref <parse_embedding>,
179
+ !ref <shuffle>,
180
+ !ref <sort>,
181
+ !ref <batch>,
182
+ !ref <padding>,
183
+ ]
184
+
185
+ # train conf
186
+ train_conf:
187
+ optim: adam
188
+ optim_conf:
189
+ lr: 0.001
190
+ scheduler: warmuplr
191
+ scheduler_conf:
192
+ warmup_steps: 2500
193
+ max_epoch: 200
194
+ grad_clip: 5
195
+ accum_grad: 2
196
+ log_interval: 100
197
+ save_per_step: -1
CosyVoice-300M-SFT/.msc ADDED
Binary file (625 Bytes). View file
 
CosyVoice-300M-SFT/.mv ADDED
@@ -0,0 +1 @@
 
 
1
+ Revision:master,CreatedAt:1720196168
CosyVoice-300M-SFT/README.md ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CosyVoice
2
+ ## 👉🏻 [CosyVoice Demos](https://fun-audio-llm.github.io/) 👈🏻
3
+ [[CosyVoice Paper](https://fun-audio-llm.github.io/pdf/CosyVoice_v1.pdf)][[CosyVoice Studio](https://www.modelscope.cn/studios/iic/CosyVoice-300M)][[CosyVoice Code](https://github.com/FunAudioLLM/CosyVoice)]
4
+
5
+ For `SenseVoice`, visit [SenseVoice repo](https://github.com/FunAudioLLM/SenseVoice) and [SenseVoice space](https://www.modelscope.cn/studios/iic/SenseVoice).
6
+
7
+ ## Install
8
+
9
+ **Clone and install**
10
+
11
+ - Clone the repo
12
+ ``` sh
13
+ git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
14
+ # If you failed to clone submodule due to network failures, please run following command until success
15
+ cd CosyVoice
16
+ git submodule update --init --recursive
17
+ ```
18
+
19
+ - Install Conda: please see https://docs.conda.io/en/latest/miniconda.html
20
+ - Create Conda env:
21
+
22
+ ``` sh
23
+ conda create -n cosyvoice python=3.8
24
+ conda activate cosyvoice
25
+ pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
26
+
27
+ # If you encounter sox compatibility issues
28
+ # ubuntu
29
+ sudo apt-get install sox libsox-dev
30
+ # centos
31
+ sudo yum install sox sox-devel
32
+ ```
33
+
34
+ **Model download**
35
+
36
+ We strongly recommand that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `speech_kantts_ttsfrd` resource.
37
+
38
+ If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
39
+
40
+ ``` python
41
+ # SDK模型下载
42
+ from modelscope import snapshot_download
43
+ snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
44
+ snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
45
+ snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
46
+ snapshot_download('iic/speech_kantts_ttsfrd', local_dir='pretrained_models/speech_kantts_ttsfrd')
47
+ ```
48
+
49
+ ``` sh
50
+ # git模型下载,请确保已安装git lfs
51
+ mkdir -p pretrained_models
52
+ git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
53
+ git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
54
+ git clone https://www.modelscope.cn/iic/CosyVoice-300M-Instruct.git pretrained_models/CosyVoice-300M-Instruct
55
+ git clone https://www.modelscope.cn/iic/speech_kantts_ttsfrd.git pretrained_models/speech_kantts_ttsfrd
56
+ ```
57
+
58
+ Unzip `ttsfrd` resouce and install `ttsfrd` package
59
+ ``` sh
60
+ cd pretrained_models/speech_kantts_ttsfrd/
61
+ unzip resource.zip -d .
62
+ pip install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl
63
+ ```
64
+
65
+ **Basic Usage**
66
+
67
+ For zero_shot/cross_lingual inference, please use `CosyVoice-300M` model.
68
+ For sft inference, please use `CosyVoice-300M-SFT` model.
69
+ For instruct inference, please use `CosyVoice-300M-Instruct` model.
70
+ First, add `third_party/AcademiCodec` and `third_party/Matcha-TTS` to your `PYTHONPATH`.
71
+
72
+ ``` sh
73
+ export PYTHONPATH=third_party/AcademiCodec:third_party/Matcha-TTS
74
+ ```
75
+
76
+ ``` python
77
+ from cosyvoice.cli.cosyvoice import CosyVoice
78
+ from cosyvoice.utils.file_utils import load_wav
79
+ import torchaudio
80
+
81
+ cosyvoice = CosyVoice('speech_tts/CosyVoice-300M-SFT')
82
+ # sft usage
83
+ print(cosyvoice.list_avaliable_spks())
84
+ output = cosyvoice.inference_sft('你好,我是通义生成式语音大模型,请问有什么可以帮您的吗?', '中文女')
85
+ torchaudio.save('sft.wav', output['tts_speech'], 22050)
86
+
87
+ cosyvoice = CosyVoice('speech_tts/CosyVoice-300M')
88
+ # zero_shot usage
89
+ prompt_speech_16k = load_wav('zero_shot_prompt.wav', 16000)
90
+ output = cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '希望你以后能够做的比我还好呦。', prompt_speech_16k)
91
+ torchaudio.save('zero_shot.wav', output['tts_speech'], 22050)
92
+ # cross_lingual usage
93
+ prompt_speech_16k = load_wav('cross_lingual_prompt.wav', 16000)
94
+ output = cosyvoice.inference_cross_lingual('<|en|>And then later on, fully acquiring that company. So keeping management in line, interest in line with the asset that\'s coming into the family is a reason why sometimes we don\'t buy the whole thing.', prompt_speech_16k)
95
+ torchaudio.save('cross_lingual.wav', output['tts_speech'], 22050)
96
+
97
+ cosyvoice = CosyVoice('speech_tts/CosyVoice-300M-Instruct')
98
+ # instruct usage
99
+ output = cosyvoice.inference_instruct('在面对挑战时,他展现了非凡的<strong>勇气</strong>与<strong>智慧</strong>。', '中文男', 'Theo \'Crimson\', is a fiery, passionate rebel leader. Fights with fervor for justice, but struggles with impulsiveness.')
100
+ torchaudio.save('instruct.wav', output['tts_speech'], 22050)
101
+ ```
102
+
103
+ **Start web demo**
104
+
105
+ You can use our web demo page to get familiar with CosyVoice quickly.
106
+ We support sft/zero_shot/cross_lingual/instruct inference in web demo.
107
+
108
+ Please see the demo website for details.
109
+
110
+ ``` python
111
+ # change speech_tts/CosyVoice-300M-SFT for sft inference, or speech_tts/CosyVoice-300M-Instruct for instruct inference
112
+ python3 webui.py --port 50000 --model_dir speech_tts/CosyVoice-300M
113
+ ```
114
+
115
+ **Advanced Usage**
116
+
117
+ For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`.
118
+ You can get familiar with CosyVoice following this recipie.
119
+
120
+ **Build for deployment**
121
+
122
+ Optionally, if you want to use grpc for service deployment,
123
+ you can run following steps. Otherwise, you can just ignore this step.
124
+
125
+ ``` sh
126
+ cd runtime/python
127
+ docker build -t cosyvoice:v1.0 .
128
+ # change speech_tts/CosyVoice-300M to speech_tts/CosyVoice-300M-Instruct if you want to use instruct inference
129
+ docker run -d --runtime=nvidia -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python && python3 server.py --port 50000 --max_conc 4 --model_dir speech_tts/CosyVoice-300M && sleep infinity"
130
+ python3 client.py --port 50000 --mode <sft|zero_shot|cross_lingual|instruct>
131
+ ```
132
+
133
+ ## Discussion & Communication
134
+
135
+ You can directly discuss on [Github Issues](https://github.com/FunAudioLLM/CosyVoice/issues).
136
+
137
+ You can also scan the QR code to join our officla Dingding chat group.
138
+
139
+ <img src="./asset/dingding.png" width="250px">
140
+
141
+ ## Acknowledge
142
+
143
+ 1. We borrowed a lot of code from [FunASR](https://github.com/modelscope/FunASR).
144
+ 2. We borrowed a lot of code from [FunCodec](https://github.com/modelscope/FunCodec).
145
+ 3. We borrowed a lot of code from [Matcha-TTS](https://github.com/shivammehta25/Matcha-TTS).
146
+ 4. We borrowed a lot of code from [AcademiCodec](https://github.com/yangdongchao/AcademiCodec).
147
+ 5. We borrowed a lot of code from [WeNet](https://github.com/wenet-e2e/wenet).
148
+
149
+ ## Disclaimer
150
+ The content provided above is for academic purposes only and is intended to demonstrate technical capabilities. Some examples are sourced from the internet. If any content infringes on your rights, please contact us to request its removal.
CosyVoice-300M-SFT/configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework":"Pytorch","task":"text-to-speech"}
CosyVoice-300M-SFT/cosyvoice.yaml ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # set random seed, so that you may reproduce your result.
2
+ __set_seed1: !apply:random.seed [1986]
3
+ __set_seed2: !apply:numpy.random.seed [1986]
4
+ __set_seed3: !apply:torch.manual_seed [1986]
5
+ __set_seed4: !apply:torch.cuda.manual_seed_all [1986]
6
+
7
+ # fixed params
8
+ sample_rate: 22050
9
+ text_encoder_input_size: 512
10
+ llm_input_size: 1024
11
+ llm_output_size: 1024
12
+ spk_embed_dim: 192
13
+
14
+ # model params
15
+ # for all class/function included in this repo, we use !<name> or !<new> for intialization, so that user may find all corresponding class/function according to one single yaml.
16
+ # for system/third_party class/function, we do not require this.
17
+ llm: !new:cosyvoice.llm.llm.TransformerLM
18
+ text_encoder_input_size: !ref <text_encoder_input_size>
19
+ llm_input_size: !ref <llm_input_size>
20
+ llm_output_size: !ref <llm_output_size>
21
+ text_token_size: 51866
22
+ speech_token_size: 4096
23
+ length_normalized_loss: True
24
+ lsm_weight: 0
25
+ spk_embed_dim: !ref <spk_embed_dim>
26
+ text_encoder: !new:cosyvoice.transformer.encoder.ConformerEncoder
27
+ input_size: !ref <text_encoder_input_size>
28
+ output_size: 1024
29
+ attention_heads: 16
30
+ linear_units: 4096
31
+ num_blocks: 6
32
+ dropout_rate: 0.1
33
+ positional_dropout_rate: 0.1
34
+ attention_dropout_rate: 0
35
+ normalize_before: True
36
+ input_layer: 'linear'
37
+ pos_enc_layer_type: 'rel_pos_espnet'
38
+ selfattention_layer_type: 'rel_selfattn'
39
+ use_cnn_module: False
40
+ macaron_style: False
41
+ use_dynamic_chunk: False
42
+ use_dynamic_left_chunk: False
43
+ static_chunk_size: 1
44
+ llm: !new:cosyvoice.transformer.encoder.TransformerEncoder
45
+ input_size: !ref <llm_input_size>
46
+ output_size: !ref <llm_output_size>
47
+ attention_heads: 16
48
+ linear_units: 4096
49
+ num_blocks: 14
50
+ dropout_rate: 0.1
51
+ positional_dropout_rate: 0.1
52
+ attention_dropout_rate: 0
53
+ input_layer: 'linear_legacy'
54
+ pos_enc_layer_type: 'rel_pos_espnet'
55
+ selfattention_layer_type: 'rel_selfattn'
56
+ static_chunk_size: 1
57
+
58
+ flow: !new:cosyvoice.flow.flow.MaskedDiffWithXvec
59
+ input_size: 512
60
+ output_size: 80
61
+ spk_embed_dim: !ref <spk_embed_dim>
62
+ output_type: 'mel'
63
+ vocab_size: 4096
64
+ input_frame_rate: 50
65
+ only_mask_loss: True
66
+ encoder: !new:cosyvoice.transformer.encoder.ConformerEncoder
67
+ output_size: 512
68
+ attention_heads: 8
69
+ linear_units: 2048
70
+ num_blocks: 6
71
+ dropout_rate: 0.1
72
+ positional_dropout_rate: 0.1
73
+ attention_dropout_rate: 0.1
74
+ normalize_before: True
75
+ input_layer: 'linear'
76
+ pos_enc_layer_type: 'rel_pos_espnet'
77
+ selfattention_layer_type: 'rel_selfattn'
78
+ input_size: 512
79
+ use_cnn_module: False
80
+ macaron_style: False
81
+ length_regulator: !new:cosyvoice.flow.length_regulator.InterpolateRegulator
82
+ channels: 80
83
+ sampling_ratios: [1, 1, 1, 1]
84
+ decoder: !new:cosyvoice.flow.flow_matching.ConditionalCFM
85
+ in_channels: 240
86
+ n_spks: 1
87
+ spk_emb_dim: 80
88
+ cfm_params: !new:omegaconf.DictConfig
89
+ content:
90
+ sigma_min: 1e-06
91
+ solver: 'euler'
92
+ t_scheduler: 'cosine'
93
+ training_cfg_rate: 0.2
94
+ inference_cfg_rate: 0.7
95
+ reg_loss_type: 'l1'
96
+ estimator: !new:cosyvoice.flow.decoder.ConditionalDecoder
97
+ in_channels: 320
98
+ out_channels: 80
99
+ channels: [256, 256]
100
+ dropout: 0
101
+ attention_head_dim: 64
102
+ n_blocks: 4
103
+ num_mid_blocks: 12
104
+ num_heads: 8
105
+ act_fn: 'gelu'
106
+
107
+ hift: !new:cosyvoice.hifigan.generator.HiFTGenerator
108
+ in_channels: 80
109
+ base_channels: 512
110
+ nb_harmonics: 8
111
+ sampling_rate: !ref <sample_rate>
112
+ nsf_alpha: 0.1
113
+ nsf_sigma: 0.003
114
+ nsf_voiced_threshold: 10
115
+ upsample_rates: [8, 8]
116
+ upsample_kernel_sizes: [16, 16]
117
+ istft_params:
118
+ n_fft: 16
119
+ hop_len: 4
120
+ resblock_kernel_sizes: [3, 7, 11]
121
+ resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
122
+ source_resblock_kernel_sizes: [7, 11]
123
+ source_resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5]]
124
+ lrelu_slope: 0.1
125
+ audio_limit: 0.99
126
+ f0_predictor: !new:cosyvoice.hifigan.f0_predictor.ConvRNNF0Predictor
127
+ num_class: 1
128
+ in_channels: 80
129
+ cond_channels: 512
130
+
131
+ # processor functions
132
+ parquet_opener: !name:cosyvoice.dataset.processor.parquet_opener
133
+ get_tokenizer: !name:whisper.tokenizer.get_tokenizer
134
+ multilingual: True
135
+ num_languages: 100
136
+ language: 'en'
137
+ task: 'transcribe'
138
+ allowed_special: 'all'
139
+ tokenize: !name:cosyvoice.dataset.processor.tokenize
140
+ get_tokenizer: !ref <get_tokenizer>
141
+ allowed_special: !ref <allowed_special>
142
+ filter: !name:cosyvoice.dataset.processor.filter
143
+ max_length: 40960
144
+ min_length: 0
145
+ token_max_length: 200
146
+ token_min_length: 1
147
+ resample: !name:cosyvoice.dataset.processor.resample
148
+ resample_rate: !ref <sample_rate>
149
+ feat_extractor: !name:matcha.utils.audio.mel_spectrogram
150
+ n_fft: 1024
151
+ num_mels: 80
152
+ sampling_rate: !ref <sample_rate>
153
+ hop_size: 256
154
+ win_size: 1024
155
+ fmin: 0
156
+ fmax: 8000
157
+ center: False
158
+ compute_fbank: !name:cosyvoice.dataset.processor.compute_fbank
159
+ feat_extractor: !ref <feat_extractor>
160
+ parse_embedding: !name:cosyvoice.dataset.processor.parse_embedding
161
+ normalize: True
162
+ shuffle: !name:cosyvoice.dataset.processor.shuffle
163
+ shuffle_size: 1000
164
+ sort: !name:cosyvoice.dataset.processor.sort
165
+ sort_size: 500 # sort_size should be less than shuffle_size
166
+ batch: !name:cosyvoice.dataset.processor.batch
167
+ batch_type: 'dynamic'
168
+ max_frames_in_batch: 2000
169
+ padding: !name:cosyvoice.dataset.processor.padding
170
+
171
+ # dataset processor pipeline
172
+ data_pipeline: [
173
+ !ref <parquet_opener>,
174
+ !ref <tokenize>,
175
+ !ref <filter>,
176
+ !ref <resample>,
177
+ !ref <compute_fbank>,
178
+ !ref <parse_embedding>,
179
+ !ref <shuffle>,
180
+ !ref <sort>,
181
+ !ref <batch>,
182
+ !ref <padding>,
183
+ ]
184
+
185
+ # train conf
186
+ train_conf:
187
+ optim: adam
188
+ optim_conf:
189
+ lr: 0.001
190
+ scheduler: warmuplr
191
+ scheduler_conf:
192
+ warmup_steps: 2500
193
+ max_epoch: 200
194
+ grad_clip: 5
195
+ accum_grad: 2
196
+ log_interval: 100
197
+ save_per_step: -1
CosyVoice-300M/.msc ADDED
Binary file (625 Bytes). View file
 
CosyVoice-300M/.mv ADDED
@@ -0,0 +1 @@
 
 
1
+ Revision:master,CreatedAt:1720194552
CosyVoice-300M/README.md ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CosyVoice
2
+ ## 👉🏻 [CosyVoice Demos](https://fun-audio-llm.github.io/) 👈🏻
3
+ [[CosyVoice Paper](https://fun-audio-llm.github.io/pdf/CosyVoice_v1.pdf)][[CosyVoice Studio](https://www.modelscope.cn/studios/iic/CosyVoice-300M)][[CosyVoice Code](https://github.com/FunAudioLLM/CosyVoice)]
4
+
5
+ For `SenseVoice`, visit [SenseVoice repo](https://github.com/FunAudioLLM/SenseVoice) and [SenseVoice space](https://www.modelscope.cn/studios/iic/SenseVoice).
6
+
7
+ ## Install
8
+
9
+ **Clone and install**
10
+
11
+ - Clone the repo
12
+ ``` sh
13
+ git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
14
+ # If you failed to clone submodule due to network failures, please run following command until success
15
+ cd CosyVoice
16
+ git submodule update --init --recursive
17
+ ```
18
+
19
+ - Install Conda: please see https://docs.conda.io/en/latest/miniconda.html
20
+ - Create Conda env:
21
+
22
+ ``` sh
23
+ conda create -n cosyvoice python=3.8
24
+ conda activate cosyvoice
25
+ pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
26
+
27
+ # If you encounter sox compatibility issues
28
+ # ubuntu
29
+ sudo apt-get install sox libsox-dev
30
+ # centos
31
+ sudo yum install sox sox-devel
32
+ ```
33
+
34
+ **Model download**
35
+
36
+ We strongly recommand that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `speech_kantts_ttsfrd` resource.
37
+
38
+ If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
39
+
40
+ ``` python
41
+ # SDK模型下载
42
+ from modelscope import snapshot_download
43
+ snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
44
+ snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
45
+ snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
46
+ snapshot_download('iic/speech_kantts_ttsfrd', local_dir='pretrained_models/speech_kantts_ttsfrd')
47
+ ```
48
+
49
+ ``` sh
50
+ # git模型下载,请确保已安装git lfs
51
+ mkdir -p pretrained_models
52
+ git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
53
+ git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
54
+ git clone https://www.modelscope.cn/iic/CosyVoice-300M-Instruct.git pretrained_models/CosyVoice-300M-Instruct
55
+ git clone https://www.modelscope.cn/iic/speech_kantts_ttsfrd.git pretrained_models/speech_kantts_ttsfrd
56
+ ```
57
+
58
+ Unzip `ttsfrd` resouce and install `ttsfrd` package
59
+ ``` sh
60
+ cd pretrained_models/speech_kantts_ttsfrd/
61
+ unzip resource.zip -d .
62
+ pip install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl
63
+ ```
64
+
65
+ **Basic Usage**
66
+
67
+ For zero_shot/cross_lingual inference, please use `CosyVoice-300M` model.
68
+ For sft inference, please use `CosyVoice-300M-SFT` model.
69
+ For instruct inference, please use `CosyVoice-300M-Instruct` model.
70
+ First, add `third_party/AcademiCodec` and `third_party/Matcha-TTS` to your `PYTHONPATH`.
71
+
72
+ ``` sh
73
+ export PYTHONPATH=third_party/AcademiCodec:third_party/Matcha-TTS
74
+ ```
75
+
76
+ ``` python
77
+ from cosyvoice.cli.cosyvoice import CosyVoice
78
+ from cosyvoice.utils.file_utils import load_wav
79
+ import torchaudio
80
+
81
+ cosyvoice = CosyVoice('speech_tts/CosyVoice-300M-SFT')
82
+ # sft usage
83
+ print(cosyvoice.list_avaliable_spks())
84
+ output = cosyvoice.inference_sft('你好,我是通义生成式语音大模型,请问有什么可以帮您的吗?', '中文女')
85
+ torchaudio.save('sft.wav', output['tts_speech'], 22050)
86
+
87
+ cosyvoice = CosyVoice('speech_tts/CosyVoice-300M')
88
+ # zero_shot usage
89
+ prompt_speech_16k = load_wav('zero_shot_prompt.wav', 16000)
90
+ output = cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '希望你以后能够做的比我还好呦。', prompt_speech_16k)
91
+ torchaudio.save('zero_shot.wav', output['tts_speech'], 22050)
92
+ # cross_lingual usage
93
+ prompt_speech_16k = load_wav('cross_lingual_prompt.wav', 16000)
94
+ output = cosyvoice.inference_cross_lingual('<|en|>And then later on, fully acquiring that company. So keeping management in line, interest in line with the asset that\'s coming into the family is a reason why sometimes we don\'t buy the whole thing.', prompt_speech_16k)
95
+ torchaudio.save('cross_lingual.wav', output['tts_speech'], 22050)
96
+
97
+ cosyvoice = CosyVoice('speech_tts/CosyVoice-300M-Instruct')
98
+ # instruct usage
99
+ output = cosyvoice.inference_instruct('在面对挑战时,他展现了非凡的<strong>勇气</strong>与<strong>智慧</strong>。', '中文男', 'Theo \'Crimson\', is a fiery, passionate rebel leader. Fights with fervor for justice, but struggles with impulsiveness.')
100
+ torchaudio.save('instruct.wav', output['tts_speech'], 22050)
101
+ ```
102
+
103
+ **Start web demo**
104
+
105
+ You can use our web demo page to get familiar with CosyVoice quickly.
106
+ We support sft/zero_shot/cross_lingual/instruct inference in web demo.
107
+
108
+ Please see the demo website for details.
109
+
110
+ ``` python
111
+ # change speech_tts/CosyVoice-300M-SFT for sft inference, or speech_tts/CosyVoice-300M-Instruct for instruct inference
112
+ python3 webui.py --port 50000 --model_dir speech_tts/CosyVoice-300M
113
+ ```
114
+
115
+ **Advanced Usage**
116
+
117
+ For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`.
118
+ You can get familiar with CosyVoice following this recipie.
119
+
120
+ **Build for deployment**
121
+
122
+ Optionally, if you want to use grpc for service deployment,
123
+ you can run following steps. Otherwise, you can just ignore this step.
124
+
125
+ ``` sh
126
+ cd runtime/python
127
+ docker build -t cosyvoice:v1.0 .
128
+ # change speech_tts/CosyVoice-300M to speech_tts/CosyVoice-300M-Instruct if you want to use instruct inference
129
+ docker run -d --runtime=nvidia -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python && python3 server.py --port 50000 --max_conc 4 --model_dir speech_tts/CosyVoice-300M && sleep infinity"
130
+ python3 client.py --port 50000 --mode <sft|zero_shot|cross_lingual|instruct>
131
+ ```
132
+
133
+ ## Discussion & Communication
134
+
135
+ You can directly discuss on [Github Issues](https://github.com/FunAudioLLM/CosyVoice/issues).
136
+
137
+ You can also scan the QR code to join our officla Dingding chat group.
138
+
139
+ <img src="./asset/dingding.png" width="250px">
140
+
141
+ ## Acknowledge
142
+
143
+ 1. We borrowed a lot of code from [FunASR](https://github.com/modelscope/FunASR).
144
+ 2. We borrowed a lot of code from [FunCodec](https://github.com/modelscope/FunCodec).
145
+ 3. We borrowed a lot of code from [Matcha-TTS](https://github.com/shivammehta25/Matcha-TTS).
146
+ 4. We borrowed a lot of code from [AcademiCodec](https://github.com/yangdongchao/AcademiCodec).
147
+ 5. We borrowed a lot of code from [WeNet](https://github.com/wenet-e2e/wenet).
148
+
149
+ ## Disclaimer
150
+ The content provided above is for academic purposes only and is intended to demonstrate technical capabilities. Some examples are sourced from the internet. If any content infringes on your rights, please contact us to request its removal.
CosyVoice-300M/configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework":"Pytorch","task":"text-to-speech"}
CosyVoice-300M/cosyvoice.yaml ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # set random seed, so that you may reproduce your result.
2
+ __set_seed1: !apply:random.seed [1986]
3
+ __set_seed2: !apply:numpy.random.seed [1986]
4
+ __set_seed3: !apply:torch.manual_seed [1986]
5
+ __set_seed4: !apply:torch.cuda.manual_seed_all [1986]
6
+
7
+ # fixed params
8
+ sample_rate: 22050
9
+ text_encoder_input_size: 512
10
+ llm_input_size: 1024
11
+ llm_output_size: 1024
12
+ spk_embed_dim: 192
13
+
14
+ # model params
15
+ # for all class/function included in this repo, we use !<name> or !<new> for intialization, so that user may find all corresponding class/function according to one single yaml.
16
+ # for system/third_party class/function, we do not require this.
17
+ llm: !new:cosyvoice.llm.llm.TransformerLM
18
+ text_encoder_input_size: !ref <text_encoder_input_size>
19
+ llm_input_size: !ref <llm_input_size>
20
+ llm_output_size: !ref <llm_output_size>
21
+ text_token_size: 51866
22
+ speech_token_size: 4096
23
+ length_normalized_loss: True
24
+ lsm_weight: 0
25
+ spk_embed_dim: !ref <spk_embed_dim>
26
+ text_encoder: !new:cosyvoice.transformer.encoder.ConformerEncoder
27
+ input_size: !ref <text_encoder_input_size>
28
+ output_size: 1024
29
+ attention_heads: 16
30
+ linear_units: 4096
31
+ num_blocks: 6
32
+ dropout_rate: 0.1
33
+ positional_dropout_rate: 0.1
34
+ attention_dropout_rate: 0
35
+ normalize_before: True
36
+ input_layer: 'linear'
37
+ pos_enc_layer_type: 'rel_pos_espnet'
38
+ selfattention_layer_type: 'rel_selfattn'
39
+ use_cnn_module: False
40
+ macaron_style: False
41
+ use_dynamic_chunk: False
42
+ use_dynamic_left_chunk: False
43
+ static_chunk_size: 1
44
+ llm: !new:cosyvoice.transformer.encoder.TransformerEncoder
45
+ input_size: !ref <llm_input_size>
46
+ output_size: !ref <llm_output_size>
47
+ attention_heads: 16
48
+ linear_units: 4096
49
+ num_blocks: 14
50
+ dropout_rate: 0.1
51
+ positional_dropout_rate: 0.1
52
+ attention_dropout_rate: 0
53
+ input_layer: 'linear_legacy'
54
+ pos_enc_layer_type: 'rel_pos_espnet'
55
+ selfattention_layer_type: 'rel_selfattn'
56
+ static_chunk_size: 1
57
+
58
+ flow: !new:cosyvoice.flow.flow.MaskedDiffWithXvec
59
+ input_size: 512
60
+ output_size: 80
61
+ spk_embed_dim: !ref <spk_embed_dim>
62
+ output_type: 'mel'
63
+ vocab_size: 4096
64
+ input_frame_rate: 50
65
+ only_mask_loss: True
66
+ encoder: !new:cosyvoice.transformer.encoder.ConformerEncoder
67
+ output_size: 512
68
+ attention_heads: 8
69
+ linear_units: 2048
70
+ num_blocks: 6
71
+ dropout_rate: 0.1
72
+ positional_dropout_rate: 0.1
73
+ attention_dropout_rate: 0.1
74
+ normalize_before: True
75
+ input_layer: 'linear'
76
+ pos_enc_layer_type: 'rel_pos_espnet'
77
+ selfattention_layer_type: 'rel_selfattn'
78
+ input_size: 512
79
+ use_cnn_module: False
80
+ macaron_style: False
81
+ length_regulator: !new:cosyvoice.flow.length_regulator.InterpolateRegulator
82
+ channels: 80
83
+ sampling_ratios: [1, 1, 1, 1]
84
+ decoder: !new:cosyvoice.flow.flow_matching.ConditionalCFM
85
+ in_channels: 240
86
+ n_spks: 1
87
+ spk_emb_dim: 80
88
+ cfm_params: !new:omegaconf.DictConfig
89
+ content:
90
+ sigma_min: 1e-06
91
+ solver: 'euler'
92
+ t_scheduler: 'cosine'
93
+ training_cfg_rate: 0.2
94
+ inference_cfg_rate: 0.7
95
+ reg_loss_type: 'l1'
96
+ estimator: !new:cosyvoice.flow.decoder.ConditionalDecoder
97
+ in_channels: 320
98
+ out_channels: 80
99
+ channels: [256, 256]
100
+ dropout: 0
101
+ attention_head_dim: 64
102
+ n_blocks: 4
103
+ num_mid_blocks: 12
104
+ num_heads: 8
105
+ act_fn: 'gelu'
106
+
107
+ hift: !new:cosyvoice.hifigan.generator.HiFTGenerator
108
+ in_channels: 80
109
+ base_channels: 512
110
+ nb_harmonics: 8
111
+ sampling_rate: !ref <sample_rate>
112
+ nsf_alpha: 0.1
113
+ nsf_sigma: 0.003
114
+ nsf_voiced_threshold: 10
115
+ upsample_rates: [8, 8]
116
+ upsample_kernel_sizes: [16, 16]
117
+ istft_params:
118
+ n_fft: 16
119
+ hop_len: 4
120
+ resblock_kernel_sizes: [3, 7, 11]
121
+ resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
122
+ source_resblock_kernel_sizes: [7, 11]
123
+ source_resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5]]
124
+ lrelu_slope: 0.1
125
+ audio_limit: 0.99
126
+ f0_predictor: !new:cosyvoice.hifigan.f0_predictor.ConvRNNF0Predictor
127
+ num_class: 1
128
+ in_channels: 80
129
+ cond_channels: 512
130
+
131
+ # processor functions
132
+ parquet_opener: !name:cosyvoice.dataset.processor.parquet_opener
133
+ get_tokenizer: !name:whisper.tokenizer.get_tokenizer
134
+ multilingual: True
135
+ num_languages: 100
136
+ language: 'en'
137
+ task: 'transcribe'
138
+ allowed_special: 'all'
139
+ tokenize: !name:cosyvoice.dataset.processor.tokenize
140
+ get_tokenizer: !ref <get_tokenizer>
141
+ allowed_special: !ref <allowed_special>
142
+ filter: !name:cosyvoice.dataset.processor.filter
143
+ max_length: 40960
144
+ min_length: 0
145
+ token_max_length: 200
146
+ token_min_length: 1
147
+ resample: !name:cosyvoice.dataset.processor.resample
148
+ resample_rate: !ref <sample_rate>
149
+ feat_extractor: !name:matcha.utils.audio.mel_spectrogram
150
+ n_fft: 1024
151
+ num_mels: 80
152
+ sampling_rate: !ref <sample_rate>
153
+ hop_size: 256
154
+ win_size: 1024
155
+ fmin: 0
156
+ fmax: 8000
157
+ center: False
158
+ compute_fbank: !name:cosyvoice.dataset.processor.compute_fbank
159
+ feat_extractor: !ref <feat_extractor>
160
+ parse_embedding: !name:cosyvoice.dataset.processor.parse_embedding
161
+ normalize: True
162
+ shuffle: !name:cosyvoice.dataset.processor.shuffle
163
+ shuffle_size: 1000
164
+ sort: !name:cosyvoice.dataset.processor.sort
165
+ sort_size: 500 # sort_size should be less than shuffle_size
166
+ batch: !name:cosyvoice.dataset.processor.batch
167
+ batch_type: 'dynamic'
168
+ max_frames_in_batch: 2000
169
+ padding: !name:cosyvoice.dataset.processor.padding
170
+
171
+ # dataset processor pipeline
172
+ data_pipeline: [
173
+ !ref <parquet_opener>,
174
+ !ref <tokenize>,
175
+ !ref <filter>,
176
+ !ref <resample>,
177
+ !ref <compute_fbank>,
178
+ !ref <parse_embedding>,
179
+ !ref <shuffle>,
180
+ !ref <sort>,
181
+ !ref <batch>,
182
+ !ref <padding>,
183
+ ]
184
+
185
+ # train conf
186
+ train_conf:
187
+ optim: adam
188
+ optim_conf:
189
+ lr: 0.001
190
+ scheduler: warmuplr
191
+ scheduler_conf:
192
+ warmup_steps: 2500
193
+ max_epoch: 200
194
+ grad_clip: 5
195
+ accum_grad: 2
196
+ log_interval: 100
197
+ save_per_step: -1
CosyVoice-ttsfrd/.gitattributes ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *.tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.db* filter=lfs diff=lfs merge=lfs -text
29
+ *.ark* filter=lfs diff=lfs merge=lfs -text
30
+ **/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
31
+ **/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
32
+ **/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
33
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
34
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
35
+ *.gguf* filter=lfs diff=lfs merge=lfs -text
36
+ *.ggml filter=lfs diff=lfs merge=lfs -text
37
+ *.llamafile* filter=lfs diff=lfs merge=lfs -text
38
+ resource.zip filter=lfs diff=lfs merge=lfs -text
CosyVoice-ttsfrd/README.md ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CosyVoice
2
+ ## 👉🏻 [CosyVoice Demos](https://fun-audio-llm.github.io/) 👈🏻
3
+ [[CosyVoice Paper](https://fun-audio-llm.github.io/pdf/CosyVoice_v1.pdf)][[CosyVoice Studio](https://www.modelscope.cn/studios/iic/CosyVoice-300M)][[CosyVoice Code](https://github.com/FunAudioLLM/CosyVoice)]
4
+
5
+ For `SenseVoice`, visit [SenseVoice repo](https://github.com/FunAudioLLM/SenseVoice) and [SenseVoice space](https://www.modelscope.cn/studios/iic/SenseVoice).
6
+
7
+ ## Install
8
+
9
+ **Clone and install**
10
+
11
+ - Clone the repo
12
+ ``` sh
13
+ git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
14
+ # If you failed to clone submodule due to network failures, please run following command until success
15
+ cd CosyVoice
16
+ git submodule update --init --recursive
17
+ ```
18
+
19
+ - Install Conda: please see https://docs.conda.io/en/latest/miniconda.html
20
+ - Create Conda env:
21
+
22
+ ``` sh
23
+ conda create -n cosyvoice python=3.8
24
+ conda activate cosyvoice
25
+ pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
26
+
27
+ # If you encounter sox compatibility issues
28
+ # ubuntu
29
+ sudo apt-get install sox libsox-dev
30
+ # centos
31
+ sudo yum install sox sox-devel
32
+ ```
33
+
34
+ **Model download**
35
+
36
+ We strongly recommand that you download our pretrained `CosyVoice-300M` `CosyVoice-300M-SFT` `CosyVoice-300M-Instruct` model and `speech_kantts_ttsfrd` resource.
37
+
38
+ If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
39
+
40
+ ``` python
41
+ # SDK模型下载
42
+ from modelscope import snapshot_download
43
+ snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
44
+ snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
45
+ snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
46
+ snapshot_download('iic/speech_kantts_ttsfrd', local_dir='pretrained_models/speech_kantts_ttsfrd')
47
+ ```
48
+
49
+ ``` sh
50
+ # git模型下载,请确保已安装git lfs
51
+ mkdir -p pretrained_models
52
+ git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
53
+ git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
54
+ git clone https://www.modelscope.cn/iic/CosyVoice-300M-Instruct.git pretrained_models/CosyVoice-300M-Instruct
55
+ git clone https://www.modelscope.cn/iic/speech_kantts_ttsfrd.git pretrained_models/speech_kantts_ttsfrd
56
+ ```
57
+
58
+ Unzip `ttsfrd` resouce and install `ttsfrd` package
59
+ ``` sh
60
+ cd pretrained_models/speech_kantts_ttsfrd/
61
+ unzip resource.zip -d .
62
+ pip install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl
63
+ ```
64
+
65
+ **Basic Usage**
66
+
67
+ For zero_shot/cross_lingual inference, please use `CosyVoice-300M` model.
68
+ For sft inference, please use `CosyVoice-300M-SFT` model.
69
+ For instruct inference, please use `CosyVoice-300M-Instruct` model.
70
+ First, add `third_party/AcademiCodec` and `third_party/Matcha-TTS` to your `PYTHONPATH`.
71
+
72
+ ``` sh
73
+ export PYTHONPATH=third_party/AcademiCodec:third_party/Matcha-TTS
74
+ ```
75
+
76
+ ``` python
77
+ from cosyvoice.cli.cosyvoice import CosyVoice
78
+ from cosyvoice.utils.file_utils import load_wav
79
+ import torchaudio
80
+
81
+ cosyvoice = CosyVoice('speech_tts/CosyVoice-300M-SFT')
82
+ # sft usage
83
+ print(cosyvoice.list_avaliable_spks())
84
+ output = cosyvoice.inference_sft('你好,我是通义生成式语音大模型,请问有什么可以帮您的吗?', '中文女')
85
+ torchaudio.save('sft.wav', output['tts_speech'], 22050)
86
+
87
+ cosyvoice = CosyVoice('speech_tts/CosyVoice-300M')
88
+ # zero_shot usage
89
+ prompt_speech_16k = load_wav('zero_shot_prompt.wav', 16000)
90
+ output = cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '希望你以后能够做的比我还好呦。', prompt_speech_16k)
91
+ torchaudio.save('zero_shot.wav', output['tts_speech'], 22050)
92
+ # cross_lingual usage
93
+ prompt_speech_16k = load_wav('cross_lingual_prompt.wav', 16000)
94
+ output = cosyvoice.inference_cross_lingual('<|en|>And then later on, fully acquiring that company. So keeping management in line, interest in line with the asset that\'s coming into the family is a reason why sometimes we don\'t buy the whole thing.', prompt_speech_16k)
95
+ torchaudio.save('cross_lingual.wav', output['tts_speech'], 22050)
96
+
97
+ cosyvoice = CosyVoice('speech_tts/CosyVoice-300M-Instruct')
98
+ # instruct usage
99
+ output = cosyvoice.inference_instruct('在面对挑战时,他展现了非凡的<strong>勇气</strong>与<strong>智慧</strong>。', '中文男', 'Theo \'Crimson\', is a fiery, passionate rebel leader. Fights with fervor for justice, but struggles with impulsiveness.')
100
+ torchaudio.save('instruct.wav', output['tts_speech'], 22050)
101
+ ```
102
+
103
+ **Start web demo**
104
+
105
+ You can use our web demo page to get familiar with CosyVoice quickly.
106
+ We support sft/zero_shot/cross_lingual/instruct inference in web demo.
107
+
108
+ Please see the demo website for details.
109
+
110
+ ``` python
111
+ # change speech_tts/CosyVoice-300M-SFT for sft inference, or speech_tts/CosyVoice-300M-Instruct for instruct inference
112
+ python3 webui.py --port 50000 --model_dir speech_tts/CosyVoice-300M
113
+ ```
114
+
115
+ **Advanced Usage**
116
+
117
+ For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`.
118
+ You can get familiar with CosyVoice following this recipie.
119
+
120
+ **Build for deployment**
121
+
122
+ Optionally, if you want to use grpc for service deployment,
123
+ you can run following steps. Otherwise, you can just ignore this step.
124
+
125
+ ``` sh
126
+ cd runtime/python
127
+ docker build -t cosyvoice:v1.0 .
128
+ # change speech_tts/CosyVoice-300M to speech_tts/CosyVoice-300M-Instruct if you want to use instruct inference
129
+ docker run -d --runtime=nvidia -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python && python3 server.py --port 50000 --max_conc 4 --model_dir speech_tts/CosyVoice-300M && sleep infinity"
130
+ python3 client.py --port 50000 --mode <sft|zero_shot|cross_lingual|instruct>
131
+ ```
132
+
133
+ ## Discussion & Communication
134
+
135
+ You can directly discuss on [Github Issues](https://github.com/FunAudioLLM/CosyVoice/issues).
136
+
137
+ You can also scan the QR code to join our officla Dingding chat group.
138
+
139
+ <img src="./asset/dingding.png" width="250px">
140
+
141
+ ## Acknowledge
142
+
143
+ 1. We borrowed a lot of code from [FunASR](https://github.com/modelscope/FunASR).
144
+ 2. We borrowed a lot of code from [FunCodec](https://github.com/modelscope/FunCodec).
145
+ 3. We borrowed a lot of code from [Matcha-TTS](https://github.com/shivammehta25/Matcha-TTS).
146
+ 4. We borrowed a lot of code from [AcademiCodec](https://github.com/yangdongchao/AcademiCodec).
147
+ 5. We borrowed a lot of code from [WeNet](https://github.com/wenet-e2e/wenet).
148
+
149
+ ## Disclaimer
150
+ The content provided above is for academic purposes only and is intended to demonstrate technical capabilities. Some examples are sourced from the internet. If any content infringes on your rights, please contact us to request its removal.
CosyVoice-ttsfrd/configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework":"Pytorch","task":"text-to-speech"}
CosyVoice-ttsfrd/resource/festival/Singing.v0_1.dtd ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- <!DOCTYPE singing SYSTEM "Singing.v0_1.dtd" [ -->
2
+
3
+ <!ENTITY % baseelements "PITCH |
4
+ DURATION
5
+ ">
6
+
7
+ <!ELEMENT SINGING (#PCDATA| %baseelements; )*>
8
+ <!ATTLIST SINGING BPM CDATA "120"
9
+ MARK CDATA #IMPLIED>
10
+
11
+ <!ELEMENT PITCH ( #PCDATA | %baseelements; )*>
12
+ <!ATTLIST PITCH FREQ CDATA "X"
13
+ NOTE CDATA "X"
14
+ MARK CDATA #IMPLIED>
15
+
16
+ <!ELEMENT DURATION ( #PCDATA | %baseelements; )*>
17
+ <!ATTLIST DURATION SECONDS CDATA "X"
18
+ BEATS CDATA "X"
19
+ MARK CDATA #IMPLIED>
20
+
21
+ <!ELEMENT REST ( #PCDATA | %baseelements; )*>
22
+ <!ATTLIST REST SECONDS CDATA "X"
23
+ BEATS CDATA "X"
24
+ MARK CDATA #IMPLIED>
25
+
26
+ <!-- Character entities for latin 1 -->
27
+
28
+ <!ENTITY % ISOlat1 PUBLIC
29
+ "-//SINGING//ENTITIES Added Latin 1 for SINGING//EN"
30
+ "sable-latin.ent" >
31
+ %ISOlat1;
32
+
33
+
34
+
CosyVoice-ttsfrd/resource/festival/apml.scm ADDED
@@ -0,0 +1,551 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 2002 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;; Author: Rob Clark
34
+ ;;; Date: July 2002
35
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
36
+ ;;
37
+ ;; Sets up the current voice to synthesise from APML.
38
+ ;;
39
+ ;;
40
+
41
+ (require 'apml_f2bf0lr)
42
+ (require 'apml_kaldurtreeZ)
43
+
44
+ ;; Default pitch settings (if unspecified in current voice.)
45
+
46
+ (defvar apml_default_pitch_mean 170 )
47
+ (defvar apml_default_pitch_standard_deviation 34 )
48
+
49
+ ;; apml sythesis wrappers.
50
+
51
+ (define (apml_client_synth apml)
52
+ "(apml_client_synth apml)
53
+ Synthesise apml and return waveform(s) to client."
54
+ (utt.send.wave.client (apml_synth apml)))
55
+
56
+ (define (apml_synth apml)
57
+ "(apml_synth xml)
58
+ Synthesis an apml string."
59
+ (let ((tmpfile (make_tmp_filename))
60
+ utt)
61
+ (string_to_file tmpfile apml)
62
+ (set! utt (apml_file_synth tmpfile))
63
+ (delete-file tmpfile)
64
+ utt))
65
+
66
+ (define (apml_file_synth filename)
67
+ "(apml_file_synth filename)
68
+ Synthesis an apml file."
69
+ (let ((utt (Utterance Tokens nil)))
70
+ (utt.load utt filename)
71
+ (utt.synth utt)))
72
+
73
+ (define (string_to_file file s)
74
+ "(string_to_file file string)
75
+ Write string to file."
76
+ (let ((fd))
77
+ (set! fd (fopen file "wb"))
78
+ (format fd "%s" s)
79
+ (fclose fd)))
80
+
81
+
82
+ ;;;
83
+ ;;; Phrasing.
84
+ ;;;
85
+
86
+ ;; phrasing CART.
87
+ ;
88
+ ; It has been decided that by default, only punctuation should affect
89
+ ; phrasing (and subsequently pauses)
90
+ ;
91
+ (set! apml_phrase_tree
92
+ '
93
+ ((lisp_apml_punc in ("?" "." ":")) ; big punctuation
94
+ ((BB))
95
+ ((lisp_apml_punc in ("'" "\"" "," ";")) ; else little punctuation
96
+ ((B))
97
+ ((lisp_apml_last_word is 1)
98
+ ((BB)) ; need a BB at the end!
99
+ ((NB)))))) ; else nothing
100
+
101
+ ;; feature functions for phrasing
102
+ (define (apml_punc word)
103
+ (item.feat (item.relation.parent word 'Token) 'punc))
104
+
105
+ (define (apml_last_word word)
106
+ (if (item.next word)
107
+ "0" "1"))
108
+
109
+
110
+ ;;;
111
+ ;;; Pauses
112
+ ;;;
113
+
114
+ ;; feature functions for pauses
115
+ (define (apml_is_pause word)
116
+ (if (item.relation (item.relation.parent word 'Token) 'Pause)
117
+ t
118
+ nil))
119
+
120
+ (define (apml_pause word)
121
+ (if (item.relation word 'Pause)
122
+ (item.feat (item.relation.parent (item.relation.parent word 'Token) 'Pause) "sec")
123
+ 0))
124
+
125
+ (define (Apml_Pauses utt)
126
+ "(Pauses UTT)
127
+ Predict pause insertion for apml."
128
+ (let ((words (utt.relation.items utt 'Word)) lastword tpname)
129
+ (if words
130
+ (begin
131
+ (insert_initial_pause utt) ;; always have a start pause
132
+ (set! lastword (car (last words)))
133
+ (mapcar
134
+ (lambda (w)
135
+ (let ((pbreak (item.feat w "pbreak"))
136
+ (emph (item.feat w "R:Token.parent.EMPH")))
137
+ (cond
138
+ ((apml_is_pause w)
139
+ (insert_pause utt w))
140
+ ((or (string-equal "B" pbreak)
141
+ (string-equal "BB" pbreak))
142
+ (insert_pause utt w))
143
+ ((equal? w lastword)
144
+ (insert_pause utt w)))))
145
+ words)
146
+ ;; The embarassing bit. Remove any words labelled as punc or fpunc
147
+ (mapcar
148
+ (lambda (w)
149
+ (let ((pos (item.feat w "pos")))
150
+ (if (or (string-equal "punc" pos)
151
+ (string-equal "fpunc" pos))
152
+ (let ((pbreak (item.feat w "pbreak"))
153
+ (wp (item.relation w 'Phrase)))
154
+ (if (and (string-matches pbreak "BB?")
155
+ (item.relation.prev w 'Word))
156
+ (item.set_feat
157
+ (item.relation.prev w 'Word) "pbreak" pbreak))
158
+ (item.relation.remove w 'Word)
159
+ ;; can't refer to w as we've just deleted it
160
+ (item.relation.remove wp 'Phrase)))))
161
+ words)))
162
+ utt))
163
+
164
+
165
+
166
+ ;;;
167
+ ;;; Intonation.
168
+ ;;;
169
+
170
+ ;; Accent prediction (well transfer really).
171
+ ;;
172
+ ;; We treat L+H* L-H% on a single syllable as a special case.
173
+
174
+ (set! apml_accent_cart
175
+ '
176
+ ((lisp_apml_accent is "Hstar")
177
+ ((H*))
178
+ ((lisp_apml_accent is "Lstar")
179
+ ((L*))
180
+ ((lisp_apml_LHLH is "LHLH")
181
+ ((L+H*L-H%))
182
+ ((lisp_apml_accent is "LplusHstar")
183
+ ((L+H*))
184
+ ((lisp_apml_accent is "LstarplusH")
185
+ ((L*+H))
186
+ ((NONE))))))))
187
+
188
+ (set! apml_boundary_cart
189
+ '
190
+ ((lisp_apml_boundary is "LL")
191
+ ((L-L%))
192
+ ((lisp_apml_LHLH is "LHLH")
193
+ ((NONE)) ; this is dealt with by the accent feature
194
+ ((lisp_apml_boundary is "LH")
195
+ ((L-H%))
196
+ ((lisp_apml_boundary is "HH")
197
+ ((H-H%))
198
+ ((lisp_apml_boundary is "HL")
199
+ ((H-L%))
200
+ ((NONE))))))))
201
+
202
+ ;; feature functions.
203
+ (define (apml_accent syl)
204
+ (let ((token (item.relation.parent (item.relation.parent syl 'SylStructure) 'Token)))
205
+ (if (and (eq (item.feat syl 'stress) 1)
206
+ (item.relation.parent token 'Emphasis))
207
+ (item.feat (item.relation.parent token 'Emphasis) 'x-pitchaccent)
208
+ 0)))
209
+
210
+ (define (apml_boundary syl)
211
+ (let ((token (item.relation.parent (item.relation.parent syl 'SylStructure) 'Token)))
212
+ (if (and (> (item.feat syl 'syl_break) 0)
213
+ (item.relation.parent token 'Boundary))
214
+ (item.feat (item.relation.parent token 'Boundary) 'type)
215
+ 0)))
216
+
217
+ (define (apml_LHLH syl)
218
+ (let ((accent (apml_accent syl))
219
+ (boundary (apml_boundary syl)))
220
+ (if (and (string-equal accent "LplusHstar")
221
+ (string-equal boundary "LH"))
222
+ "LHLH"
223
+ 0)))
224
+
225
+
226
+ (define (apml_seg_is_LHLH_vowel seg)
227
+ (if (and (string-equal (apml_LHLH (item.relation.parent seg 'SylStructure))
228
+ "LHLH")
229
+ (string-equal (item.feat seg 'ph_vc) "+"))
230
+ "LHLH"
231
+ 0))
232
+
233
+
234
+ ;;;; feature functions:
235
+
236
+ (define (apml_tgtype syl)
237
+ (let ((l (apml_boundl (item.relation.parent syl 'SylStructure)))
238
+ (r (apml_boundr (item.relation.parent syl 'SylStructure))))
239
+ (if (eq (item.feat syl 'accented) 0)
240
+ 0 ; this is a quirk related to the way the models were trained
241
+ (cond
242
+ ((eq l 0)
243
+ 1)
244
+ ((eq r 1)
245
+ 3)
246
+ (t 2)))))
247
+
248
+
249
+ (define (apml_iecount syl)
250
+ (if (eq (item.feat syl 'accented) 0)
251
+ 0 ; this is a quirk related to the way the models were trained
252
+ (+ (item.feat syl 'asyl_in) 1)))
253
+
254
+ ;; suport functions.
255
+ (define (apml_boundl word)
256
+ "(apml_boundl word)
257
+ Number of boundaries in this performative to the left of this word."
258
+ (let ((w (item.prev word))
259
+ (c 0))
260
+ (while (and w (apml_same_p w word))
261
+ (if (item.relation.parent (item.relation.parent w 'Token) 'Boundary)
262
+ (set! c (+ c 1)))
263
+ (set! w (item.prev w)))
264
+ c))
265
+
266
+ (define (apml_boundr word)
267
+ "(apml_boundr word)
268
+ Number of boundaries in this performative to the right of this word."
269
+ (let ((w word)
270
+ (c 0))
271
+ (while (and w (apml_same_p w word))
272
+ (if (item.relation.parent (item.relation.parent w 'Token) 'Boundary)
273
+ (set! c (+ c 1)))
274
+ (set! w (item.next w)))
275
+ c))
276
+
277
+ (define (apml_same_p w1 w2)
278
+ "(apml_same_p w1 w2)
279
+ Are these two words in the same performative?"
280
+ (let ((p1 (item.relation.parent (item.relation.parent w1 'Token) 'SemStructure))
281
+ (p2 (item.relation.parent (item.relation.parent w1 'Token) 'SemStructure)))
282
+ (if (and (item.parent p1) (item.parent p2)) ; not true if theme/rheme omitted.
283
+ (equal? (item.parent p1) (item.parent p2))
284
+ (equal? p1 p2))))
285
+
286
+ ;;;
287
+ ;;; segment timings
288
+ ;;;
289
+
290
+ (define (apml_seg_times utt)
291
+ "(apml_seg_times utt)
292
+ Output the segment timings for an apml utterance."
293
+ (let ((segs (utt.relation.items utt 'Segment)))
294
+ (mapcar
295
+ (lambda (x)
296
+ (format t "%s %s\n" (item.name x) (item.feat x 'end)))
297
+ segs)
298
+ t))
299
+
300
+ ;;;
301
+ ;;; Additional functions for f0model.
302
+ ;;;
303
+
304
+
305
+ (define (find_hstar_left syl)
306
+ "(find_hstar_left syl)
307
+ If the closest accent or boundary to the left is H* return how many syllables away it is. Returns 0 if nearest accent is not H*"
308
+ (let ((count 0))
309
+ ;; if this syllable has a pitch event
310
+ (if (or (not (string-equal (item.feat syl 'tobi_accent) "NONE"))
311
+ (not (string-equal (item.feat syl 'tobi_endtone) "NONE")))
312
+ 0)
313
+ (while (and syl
314
+ (string-equal (item.feat syl 'tobi_accent) "NONE")
315
+ (string-equal (item.feat syl 'tobi_endtone) "NONE"))
316
+ (set! count (+ count 1))
317
+ (set! syl (item.prev syl)))
318
+ (cond
319
+ ;; run out of syllables before finding accent
320
+ ((null syl)
321
+ 0)
322
+ ((string-equal (item.feat syl 'tobi_accent) "H*")
323
+ count)
324
+ (t 0))))
325
+
326
+ (define (find_ll_right syl)
327
+ "(find_ll_right syl)
328
+ If the closest accent or boundary to the right is L-L% return how many syllables away it is. Returns 0 if nearest is not L-L%."
329
+ (let ((count 0))
330
+ ;; if this syllable has a pitch event
331
+ (if (or (not (string-equal (item.feat syl 'tobi_accent) "NONE"))
332
+ (not (string-equal (item.feat syl 'tobi_endtone) "NONE")))
333
+ 0)
334
+ (while (and syl
335
+ (string-equal (item.feat syl 'tobi_accent) "NONE")
336
+ (string-equal (item.feat syl 'tobi_endtone) "NONE"))
337
+ (set! count (+ count 1))
338
+ (set! syl (item.next syl)))
339
+ (cond
340
+ ;; run out of syllables before finding boundary
341
+ ((null syl)
342
+ 0)
343
+ ((string-equal (item.feat syl 'tobi_endtone) "L-L%")
344
+ count)
345
+ (t 0))))
346
+
347
+ (define (l_spread syl)
348
+ "(l_spread syl)
349
+ Proportion of pitch lowering required due to L- spreading backwards."
350
+ (let ((l (find_hstar_left syl))
351
+ (r (find_ll_right syl)))
352
+ (cond
353
+ ((or (eq l 0)
354
+ (eq r 0))
355
+ 0)
356
+ (t
357
+ (/ r (- (+ l r) 1))))))
358
+
359
+
360
+ ;;;
361
+ ;;; Debuging and other useful stuff.
362
+ ;;;
363
+
364
+
365
+
366
+ (define (apml_print_semstruct utt)
367
+ "(apml_print_semstruct utt)
368
+ Pretty print APML semantic structure."
369
+ (let ((i (utt.relation.first utt 'SemStructure)))
370
+ (while (not (null i))
371
+ (apml_pss_item 0 i)
372
+ (apml_pss_daughters 1 (item.daughters i))
373
+ (set! i (item.next i)))))
374
+
375
+ (define (apml_pss_daughters depth list)
376
+ (mapcar
377
+ (lambda (x)
378
+ (apml_pss_item depth x)
379
+ (apml_pss_daughters (+ depth 1) (item.daughters x))
380
+ )
381
+ list))
382
+
383
+
384
+ (define (apml_pss_item depth item)
385
+ (let ((c 0))
386
+ (while (< c depth)
387
+ (format t " ")
388
+ (set! c (+ c 1)))
389
+ (format t "%s\n" (item.name item))))
390
+
391
+
392
+ (define (apml_print_words utt)
393
+ "(apml_print_words utt)
394
+ Pretty print APML words with associated accents."
395
+ (mapcar
396
+ (lambda (x)
397
+ (format t "%s (" (item.name x))
398
+ (apml_pww_accent x)
399
+ (apml_pww_boundary x)
400
+ (apml_pww_pause x)
401
+ (format t ")\n"))
402
+ (utt.relation.items utt 'Word))
403
+ t)
404
+
405
+ (define (apml_pww_accent item)
406
+ (let ((p (item.relation.parent (item.relation.parent item 'Token) 'Emphasis)))
407
+ (if p (apml_ppw_list (item.features p)))))
408
+
409
+ (define (apml_pww_boundary item)
410
+ (let ((p (item.relation.parent (item.relation.parent item 'Token) 'Boundary)))
411
+ (if p (apml_ppw_list (item.features p)))))
412
+
413
+ (define (apml_pww_pause item)
414
+ (let ((p (item.relation.parent (item.relation.parent item 'Token) 'Pause)))
415
+ (if p (apml_ppw_list (item.features p)))))
416
+
417
+ (define (apml_ppw_list l)
418
+ (mapcar
419
+ (lambda (x)
420
+ (format t " %s" x))
421
+ (flatten l)))
422
+
423
+
424
+ (define (apml_print_sylstructure utt filename)
425
+ "(apml_print_sylstructure utt filename)
426
+ Pretty print APML syllable structure. Filename t for stdout"
427
+ (let (fd)
428
+ (if (not (eq? filename t))
429
+ (set! fd (fopen filename "wb"))
430
+ (set! fd t))
431
+ (mapcar
432
+ (lambda (x)
433
+ (format fd "%s\n" (item.name x))
434
+ (apml_psyl fd x))
435
+ (utt.relation.items utt 'Word))
436
+ t))
437
+
438
+ (define (apml_psyl fd word)
439
+ (mapcar
440
+ (lambda (x)
441
+ (apml_psegs fd x)
442
+ (if (eq (item.feat x 'stress) 1)
443
+ (format fd " (1)"))
444
+ (if (item.relation.daughter1 x 'Intonation)
445
+ (begin
446
+ (let ((ie (item.relation.daughter1 x 'Intonation)))
447
+ (format fd " [")
448
+ (while ie
449
+ (format fd "%s" (item.name ie))
450
+ (set! ie (item.next ie))
451
+ (if ie (format t " ")))
452
+ (format fd "]"))))
453
+ (format fd "\n"))
454
+ (item.daughters (item.relation word 'SylStructure))))
455
+
456
+ (define (apml_psegs fd syl)
457
+ (let ((segs (item.daughters syl)))
458
+ (format fd " ")
459
+ (while segs
460
+ (format fd "%s" (item.name (car segs)))
461
+ (if (cdr segs)
462
+ (format fd "."))
463
+ (set! segs (cdr segs)))))
464
+
465
+
466
+ (define (apml_get_lr_params)
467
+ (let ((m 0)
468
+ (s 0))
469
+ (if (or (equal? (Parameter.get 'Int_Target_Method) Int_Targets_LR)
470
+ (equal? (Parameter.get 'Int_Target_Method) Int_Targets_5_LR))
471
+ (begin
472
+ (set! m (car (cdr (car int_lr_params))))
473
+ (set! s (car (cdr (car (cdr int_lr_params))))))
474
+ (begin
475
+ (set! m apml_default_pitch_mean)
476
+ (set! s apml_default_pitch_standard_deviation)))
477
+ (list m s)))
478
+
479
+
480
+
481
+
482
+ (define (apml_initialise)
483
+ "(apml_initialise)
484
+ Set up the current voice for apml use."
485
+ (if (not (string-matches current-voice ".*multisyn.*")) ; nothing if multisyn
486
+ (cond
487
+ ((or (string-equal (Parameter.get 'Language) "americanenglish")
488
+ (string-equal (Parameter.get 'Language) "britishenglish"))
489
+ (begin
490
+ (format t "Initialising APML for English.\n")
491
+ ;; Phrasing.
492
+ (Parameter.set 'Phrase_Method 'cart_tree)
493
+ (set! phrase_cart_tree apml_phrase_tree)
494
+ ;; Pauses.
495
+ ;;(set! duration_cart_tree apml_kal_duration_cart_tree)
496
+ ;;(set! duration_ph_info apml_kal_durs)
497
+ ;;(Parameter.set 'Pause_Method Apml_Pauses)
498
+ ;; Lexicon.
499
+ ;;;; We now assume the lexicon you have already set is suitable,
500
+ ;;;; You probably want to ensure this is "apmlcmu" or "unilex"
501
+ ;;(if (not (member_string "apmlcmu" (lex.list)))
502
+ ;; (load (path-append lexdir "apmlcmu/apmlcmulex.scm")))
503
+ ;;(lex.select "apmlcmu")
504
+ ;; Add other lex entries here:
505
+ ;;(lex.add.entry '("minerals" nil (((m ih n) 1) ((er) 0) ((ax l z) 0))))
506
+ ;;(lex.add.entry '("fibre" nil (((f ay b) 1) ((er) 0))))
507
+ ;;(lex.add.entry '("dont" v (((d ow n t) 1))))
508
+ ;;(lex.add.entry '("pectoris" nil (((p eh k) 2) ((t ao r) 1) ((ih s) 0))))
509
+ ;;(lex.add.entry '("sideeffects" nil (((s ay d) 1) ((ax f) 0) ((eh k t s) 2))))
510
+
511
+ ;; Intonation events.
512
+ (set! int_accent_cart_tree apml_accent_cart)
513
+ (set! int_tone_cart_tree apml_boundary_cart)
514
+ (Parameter.set 'Int_Method Intonation_Tree)
515
+ ;; Intonation f0 contour.
516
+ (set! f0_lr_start apml_f2b_f0_lr_start)
517
+ (set! f0_lr_left apml_f2b_f0_lr_left)
518
+ (set! f0_lr_mid apml_f2b_f0_lr_mid)
519
+ (set! f0_lr_right apml_f2b_f0_lr_right)
520
+ (set! f0_lr_end apml_f2b_f0_lr_end)
521
+ (set! int_lr_params
522
+ (list (list 'target_f0_mean (car (apml_get_lr_params)))
523
+ (list 'target_f0_std (car (cdr (apml_get_lr_params))))
524
+ (list 'model_f0_mean 170)
525
+ (list 'model_f0_std 40)))
526
+ (Parameter.set 'Int_Target_Method Int_Targets_5_LR)
527
+ nil))
528
+ ((string-equal (Parameter.get 'Language) "italian")
529
+ (begin
530
+ (format t "Initialising APML for Italian.\n")
531
+ ;; Phrasing.
532
+ (Parameter.set 'Phrase_Method 'cart_tree)
533
+ (set! phrase_cart_tree apml_phrase_tree)
534
+ ;; Intonation events.
535
+ (set! int_accent_cart_tree apml_accent_cart)
536
+ (set! int_tone_cart_tree apml_boundary_cart)
537
+ (Parameter.set 'Int_Method Intonation_Tree)
538
+ ;; Intonation f0 contour.
539
+ (set! f0_lr_start apml_f2b_f0_lr_start)
540
+ (set! f0_lr_mid apml_f2b_f0_lr_mid)
541
+ (set! f0_lr_end apml_f2b_f0_lr_end)
542
+ (set! int_lr_params
543
+ (list (list 'target_f0_mean (car (apml_get_lr_params)))
544
+ (list 'target_f0_std (car (cdr (apml_get_lr_params))))
545
+ (list 'model_f0_mean 170)
546
+ (list 'model_f0_std 34)))
547
+ (Parameter.set 'Int_Target_Method Int_Targets_LR)
548
+ nil))
549
+ (t nil))))
550
+
551
+ (provide 'apml)
CosyVoice-ttsfrd/resource/festival/apml_f2bf0lr.scm ADDED
@@ -0,0 +1,530 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 2002 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;; Author: Rob Clark
34
+ ;;; Date: July 2002
35
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
36
+ ;;
37
+ ;; APML.f0 trees.
38
+ ;;
39
+ ;;
40
+
41
+ (set! apml_f2b_f0_lr_start
42
+ '(
43
+ ( Intercept 163.9871 )
44
+ ( pp.lisp_apml_tgtype -3.1750 (1) )
45
+ ( p.lisp_apml_tgtype 5.0332 (1) )
46
+ ( lisp_apml_tgtype 0.0000 (1) )
47
+ ( n.lisp_apml_tgtype 17.7799 (1) )
48
+ ( nn.lisp_apml_tgtype 13.6845 (1) )
49
+ ( pp.lisp_apml_tgtype 0.0000 (2) )
50
+ ( p.lisp_apml_tgtype 0.0000 (2) )
51
+ ( lisp_apml_tgtype 0.0000 (2) )
52
+ ( n.lisp_apml_tgtype 0.0000 (2) )
53
+ ( nn.lisp_apml_tgtype 0.0000 (2) )
54
+ ( pp.lisp_apml_tgtype 0.0000 (3) )
55
+ ( p.lisp_apml_tgtype 0.0000 (3) )
56
+ ( lisp_apml_tgtype -9.7245 (3) )
57
+ ( n.lisp_apml_tgtype 0.0000 (3) )
58
+ ( nn.lisp_apml_tgtype -2.4009 (3) )
59
+ ( pp.lisp_apml_iecount 0.0000 )
60
+ ( p.lisp_apml_iecount -0.4484 )
61
+ ( lisp_apml_iecount 0.0000 )
62
+ ( n.lisp_apml_iecount -2.0165 )
63
+ ( nn.lisp_apml_iecount 0.0000 )
64
+ ( pp.tobi_accent 0.0000 (H*) )
65
+ ( p.tobi_accent 11.1239 (H*) )
66
+ ( tobi_accent 21.5164 (H*) )
67
+ ( n.tobi_accent -2.5990 (H*) )
68
+ ( nn.tobi_accent -6.5307 (H*) )
69
+ ( pp.tobi_accent 0.0000 (L*) )
70
+ ( p.tobi_accent -10.0000 (L*) )
71
+ ( tobi_accent -5.0000 (L*) )
72
+ ( n.tobi_accent -10.6798 (L*) )
73
+ ( nn.tobi_accent -5.6561 (L*) )
74
+ ( pp.tobi_accent 5.3577 (L*+H) )
75
+ ( p.tobi_accent 60.0000 (L*+H) )
76
+ ( tobi_accent -5.0000 (L*+H) )
77
+ ( n.tobi_accent 0.0000 (L*+H) )
78
+ ( nn.tobi_accent 0.0000 (L*+H) )
79
+ ( pp.tobi_accent 0.0000 (L+H*) )
80
+ ( p.tobi_accent 11.1200 (L+H*) )
81
+ ( tobi_accent 21.5200 (L+H*) )
82
+ ( n.tobi_accent -2.6000 (L+H*) )
83
+ ( nn.tobi_accent -6.5300 (L+H*) )
84
+ ( pp.tobi_endtone 0.0000 (L-L%) )
85
+ ( p.tobi_endtone -0.6164 (L-L%) )
86
+ ( tobi_endtone -50 (L-L%) )
87
+ ( n.tobi_endtone -10.8729 (L-L%) )
88
+ ( nn.tobi_endtone -7.6522 (L-L%) )
89
+ ( pp.tobi_endtone 0.7583 (L-H%) )
90
+ ( p.tobi_endtone 0.0000 (L-H%) )
91
+ ( tobi_endtone -20.0000 (L-H%) )
92
+ ( n.tobi_endtone -11.8935 (L-H%) )
93
+ ( nn.tobi_endtone -7.2012 (L-H%) )
94
+ ( pp.tobi_endtone 0.0000 (H-L%) )
95
+ ( p.tobi_endtone 0.0000 (H-L%) )
96
+ ( tobi_endtone 4.0790 (H-L%) )
97
+ ( n.tobi_endtone -19.3463 (H-L%) )
98
+ ( nn.tobi_endtone -29.3615 (H-L%) )
99
+ ( pp.tobi_endtone 0.0000 (H-H%) )
100
+ ( p.tobi_endtone 0.0000 (H-H%) )
101
+ ( tobi_endtone 0.0000 (H-H%) )
102
+ ( n.tobi_endtone 0.0000 (H-H%) )
103
+ ( nn.tobi_endtone 0.0000 (H-H%) )
104
+ ( pp.tobi_endtone 0.0000 (L-) )
105
+ ( p.tobi_endtone -15.1702 (L-) )
106
+ ( tobi_endtone 0.0000 (L-) )
107
+ ( n.tobi_endtone -14.5562 (L-) )
108
+ ( nn.tobi_endtone 0.0000 (L-) )
109
+ ( pp.tobi_endtone -13.5046 (H-) )
110
+ ( p.tobi_endtone 0.0000 (H-) )
111
+ ( tobi_endtone 6.3377 (H-) )
112
+ ( n.tobi_endtone -6.8631 (H-) )
113
+ ( nn.tobi_endtone 0.0000 (H-) )
114
+ ( p.tobi_accent 60.0000 (L+H*L-H%) )
115
+ ( tobi_accent -60.0000 (L+H*L-H%) )
116
+ ( n.tobi_accent 0.0000 (L+H*L-H%) )
117
+ ( pp.syl_break 0.0000 )
118
+ ( p.syl_break 0.0000 )
119
+ ( syl_break 0.6417 )
120
+ ( n.syl_break 1.3532 )
121
+ ( nn.syl_break 1.0724 )
122
+ ( pp.stress 0.0000 )
123
+ ( p.stress -0.6193 )
124
+ ( stress 2.4121 )
125
+ ( n.stress 0.0000 )
126
+ ( nn.stress 2.5478 )
127
+ ( syl_in -1.4373 )
128
+ ( syl_out 0.4181 )
129
+ ( ssyl_in 0.0000 )
130
+ ( ssyl_out 0.6125 )
131
+ ( asyl_in 0.0000 )
132
+ ( asyl_out 0.9906 )
133
+ ( last_accent 0.0000 )
134
+ ( next_accent -0.3700 )
135
+ ( sub_phrases 0.0000 )
136
+ ( lisp_l_spread -60.0000 )
137
+ ))
138
+
139
+ (set! apml_f2b_f0_lr_left
140
+ '(
141
+ ( Intercept 162.1173 )
142
+ ( pp.lisp_apml_tgtype -1.5875 (1) )
143
+ ( p.lisp_apml_tgtype 4.8101 (1) )
144
+ ( lisp_apml_tgtype 12.8265 (1) )
145
+ ( n.lisp_apml_tgtype 16.3027 (1) )
146
+ ( nn.lisp_apml_tgtype 13.3225 (1) )
147
+ ( pp.lisp_apml_tgtype 0.0000 (2) )
148
+ ( p.lisp_apml_tgtype 1.7434 (2) )
149
+ ( lisp_apml_tgtype 6.7783 (2) )
150
+ ( n.lisp_apml_tgtype 0.6679 (2) )
151
+ ( nn.lisp_apml_tgtype 0.0000 (2) )
152
+ ( pp.lisp_apml_tgtype 1.6494 (3) )
153
+ ( p.lisp_apml_tgtype 1.2861 (3) )
154
+ ( lisp_apml_tgtype -2.0724 (3) )
155
+ ( n.lisp_apml_tgtype 0.0000 (3) )
156
+ ( nn.lisp_apml_tgtype -1.2004 (3) )
157
+ ( pp.lisp_apml_iecount 0.0000 )
158
+ ( p.lisp_apml_iecount -0.5857 )
159
+ ( lisp_apml_iecount 0.0000 )
160
+ ( n.lisp_apml_iecount -2.3543 )
161
+ ( nn.lisp_apml_iecount 0.0000 )
162
+ ( pp.tobi_accent 0.0000 (H*) )
163
+ ( p.tobi_accent 8.5867 (H*) )
164
+ ( tobi_accent 21.2169 (H*) )
165
+ ( n.tobi_accent -1.2995 (H*) )
166
+ ( nn.tobi_accent -6.5056 (H*) )
167
+ ( pp.tobi_accent 0.0000 (L*) )
168
+ ( p.tobi_accent -7.5000 (L*) )
169
+ ( tobi_accent -25.0000 (L*) )
170
+ ( n.tobi_accent -8.3939 (L*) )
171
+ ( nn.tobi_accent -4.5688 (L*) )
172
+ ( pp.tobi_accent 2.6789 (L*+H) )
173
+ ( p.tobi_accent 45.0000 (L*+H) )
174
+ ( tobi_accent -17.5000 (L*+H) )
175
+ ( n.tobi_accent -1.3600 (L*+H) )
176
+ ( nn.tobi_accent 0.0000 (L*+H) )
177
+ ( pp.tobi_accent 0.0000 (L+H*) )
178
+ ( p.tobi_accent 8.5850 (L+H*) )
179
+ ( tobi_accent 21.2200 (L+H*) )
180
+ ( n.tobi_accent -1.3000 (L+H*) )
181
+ ( nn.tobi_accent -6.5050 (L+H*) )
182
+ ( pp.tobi_endtone 1.8117 (L-L%) )
183
+ ( p.tobi_endtone -0.1681 (L-L%) )
184
+ ( tobi_endtone -70 (L-L%) )
185
+ ( n.tobi_endtone -8.9334 (L-L%) )
186
+ ( nn.tobi_endtone -8.4034 (L-L%) )
187
+ ( pp.tobi_endtone 1.2099 (L-H%) )
188
+ ( p.tobi_endtone 1.1220 (L-H%) )
189
+ ( tobi_endtone -10.0000 (L-H%) )
190
+ ( n.tobi_endtone -5.9467 (L-H%) )
191
+ ( nn.tobi_endtone -6.9072 (L-H%) )
192
+ ( pp.tobi_endtone 0.0000 (H-L%) )
193
+ ( p.tobi_endtone 0.0000 (H-L%) )
194
+ ( tobi_endtone 2.0395 (H-L%) )
195
+ ( n.tobi_endtone -12.3940 (H-L%) )
196
+ ( nn.tobi_endtone -24.2593 (H-L%) )
197
+ ( pp.tobi_endtone 0.0000 (H-H%) )
198
+ ( p.tobi_endtone 0.0000 (H-H%) )
199
+ ( tobi_endtone 0.0000 (H-H%) )
200
+ ( n.tobi_endtone 0.0000 (H-H%) )
201
+ ( nn.tobi_endtone 16.1076 (H-H%) )
202
+ ( pp.tobi_endtone -1.8913 (L-) )
203
+ ( p.tobi_endtone -15.5650 (L-) )
204
+ ( tobi_endtone -18.3620 (L-) )
205
+ ( n.tobi_endtone -9.8322 (L-) )
206
+ ( nn.tobi_endtone -1.8182 (L-) )
207
+ ( pp.tobi_endtone -13.4429 (H-) )
208
+ ( p.tobi_endtone 0.0000 (H-) )
209
+ ( tobi_endtone 1.9053 (H-) )
210
+ ( n.tobi_endtone -3.4315 (H-) )
211
+ ( nn.tobi_endtone 0.0000 (H-) )
212
+ ( p.tobi_accent 0.0000 (L+H*L-H%) )
213
+ ( tobi_accent 10.0000 (L+H*L-H%) )
214
+ ( n.tobi_accent 0.0000 (L+H*L-H%) )
215
+ ( pp.syl_break 0.3501 )
216
+ ( p.syl_break -0.8121 )
217
+ ( syl_break 0.3209 )
218
+ ( n.syl_break 0.7486 )
219
+ ( nn.syl_break 0.8182 )
220
+ ( pp.stress -0.9778 )
221
+ ( p.stress -0.3096 )
222
+ ( stress 2.7752 )
223
+ ( n.stress 0.9976 )
224
+ ( nn.stress 2.7343 )
225
+ ( syl_in -1.9845 )
226
+ ( syl_out 0.7142 )
227
+ ( ssyl_in 1.0376 )
228
+ ( ssyl_out 0.3062 )
229
+ ( asyl_in 0.0000 )
230
+ ( asyl_out 0.4953 )
231
+ ( last_accent 0.0000 )
232
+ ( next_accent 0.1084 )
233
+ ( sub_phrases 0.0000 )
234
+ ( lisp_l_spread -60.0000 )
235
+ ))
236
+
237
+ (set! apml_f2b_f0_lr_mid
238
+ '(
239
+ ( Intercept 160.2474 )
240
+ ( pp.lisp_apml_tgtype 0.0000 (1) )
241
+ ( p.lisp_apml_tgtype 4.5869 (1) )
242
+ ( lisp_apml_tgtype 25.6530 (1) )
243
+ ( n.lisp_apml_tgtype 14.8255 (1) )
244
+ ( nn.lisp_apml_tgtype 12.9605 (1) )
245
+ ( pp.lisp_apml_tgtype 0.0000 (2) )
246
+ ( p.lisp_apml_tgtype 3.4867 (2) )
247
+ ( lisp_apml_tgtype 13.5566 (2) )
248
+ ( n.lisp_apml_tgtype 1.3359 (2) )
249
+ ( nn.lisp_apml_tgtype 0.0000 (2) )
250
+ ( pp.lisp_apml_tgtype 3.2989 (3) )
251
+ ( p.lisp_apml_tgtype 2.5723 (3) )
252
+ ( lisp_apml_tgtype 5.5798 (3) )
253
+ ( n.lisp_apml_tgtype 0.0000 (3) )
254
+ ( nn.lisp_apml_tgtype 0.0000 (3) )
255
+ ( pp.lisp_apml_iecount 0.0000 )
256
+ ( p.lisp_apml_iecount -0.7231 )
257
+ ( lisp_apml_iecount 0.0000 )
258
+ ( n.lisp_apml_iecount -2.6922 )
259
+ ( nn.lisp_apml_iecount 0.0000 )
260
+ ( pp.tobi_accent 0.0000 (H*) )
261
+ ( p.tobi_accent 6.0496 (H*) )
262
+ ( tobi_accent 20.9174 (H*) )
263
+ ( n.tobi_accent 0.0000 (H*) )
264
+ ( nn.tobi_accent -6.4804 (H*) )
265
+ ( pp.tobi_accent 0.0000 (L*) )
266
+ ( p.tobi_accent -5.0000 (L*) )
267
+ ( tobi_accent -45.0000 (L*) )
268
+ ( n.tobi_accent -6.1079 (L*) )
269
+ ( nn.tobi_accent -3.4815 (L*) )
270
+ ( pp.tobi_accent 0.0000 (L*+H) )
271
+ ( p.tobi_accent 30.0000 (L*+H) )
272
+ ( tobi_accent -30.0000 (L*+H) )
273
+ ( n.tobi_accent -2.7200 (L*+H) )
274
+ ( nn.tobi_accent 0.0000 (L*+H) )
275
+ ( pp.tobi_accent 0.0000 (L+H*) )
276
+ ( p.tobi_accent 6.0500 (L+H*) )
277
+ ( tobi_accent 20.9200 (L+H*) )
278
+ ( n.tobi_accent 0.0000 (L+H*) )
279
+ ( nn.tobi_accent -6.4800 (L+H*) )
280
+ ( pp.tobi_endtone 3.6235 (L-L%) )
281
+ ( p.tobi_endtone 0.2801 (L-L%) )
282
+ ( tobi_endtone -80 (L-L%) )
283
+ ( n.tobi_endtone -6.9938 (L-L%) )
284
+ ( nn.tobi_endtone -9.1546 (L-L%) )
285
+ ( pp.tobi_endtone 1.6616 (L-H%) )
286
+ ( p.tobi_endtone 2.2441 (L-H%) )
287
+ ( tobi_endtone 0.0000 (L-H%) )
288
+ ( n.tobi_endtone 0.0000 (L-H%) )
289
+ ( nn.tobi_endtone -6.6132 (L-H%) )
290
+ ( pp.tobi_endtone 0.0000 (H-L%) )
291
+ ( p.tobi_endtone 0.0000 (H-L%) )
292
+ ( tobi_endtone 0.0000 (H-L%) )
293
+ ( n.tobi_endtone -5.4416 (H-L%) )
294
+ ( nn.tobi_endtone -19.1570 (H-L%) )
295
+ ( pp.tobi_endtone 0.0000 (H-H%) )
296
+ ( p.tobi_endtone 0.0000 (H-H%) )
297
+ ( tobi_endtone 0.0000 (H-H%) )
298
+ ( n.tobi_endtone 0.0000 (H-H%) )
299
+ ( nn.tobi_endtone 32.2151 (H-H%) )
300
+ ( pp.tobi_endtone -3.7825 (L-) )
301
+ ( p.tobi_endtone -15.9598 (L-) )
302
+ ( tobi_endtone -36.7241 (L-) )
303
+ ( n.tobi_endtone -5.1082 (L-) )
304
+ ( nn.tobi_endtone -3.6363 (L-) )
305
+ ( pp.tobi_endtone -13.3813 (H-) )
306
+ ( p.tobi_endtone 0.0000 (H-) )
307
+ ( tobi_endtone -2.5270 (H-) )
308
+ ( n.tobi_endtone 0.0000 (H-) )
309
+ ( nn.tobi_endtone 0.0000 (H-) )
310
+ ( p.tobi_accent 0.0000 (L+H*L-H%) )
311
+ ( tobi_accent 40.0000 (L+H*L-H%) )
312
+ ( n.tobi_accent 0.0000 (L+H*L-H%) )
313
+ ( pp.syl_break 0.7003 )
314
+ ( p.syl_break -1.6241 )
315
+ ( syl_break 0.0000 )
316
+ ( n.syl_break 0.1439 )
317
+ ( nn.syl_break 0.5640 )
318
+ ( pp.stress -1.9556 )
319
+ ( p.stress 0.0000 )
320
+ ( stress 3.1383 )
321
+ ( n.stress 1.9952 )
322
+ ( nn.stress 2.9208 )
323
+ ( syl_in -2.5317 )
324
+ ( syl_out 1.0103 )
325
+ ( ssyl_in 2.0751 )
326
+ ( ssyl_out 0.0000 )
327
+ ( asyl_in 0.0000 )
328
+ ( asyl_out 0.0000 )
329
+ ( last_accent 0.0000 )
330
+ ( next_accent 0.5869 )
331
+ ( sub_phrases 0.0000 )
332
+ ( lisp_l_spread -60.0000 )
333
+ ))
334
+
335
+ (set! apml_f2b_f0_lr_right
336
+ '(
337
+ ( Intercept 162.6687 )
338
+ ( pp.lisp_apml_tgtype -4.0459 (1) )
339
+ ( p.lisp_apml_tgtype 3.0601 (1) )
340
+ ( lisp_apml_tgtype 27.8166 (1) )
341
+ ( n.lisp_apml_tgtype 7.4127 (1) )
342
+ ( nn.lisp_apml_tgtype 11.3458 (1) )
343
+ ( pp.lisp_apml_tgtype -3.8091 (2) )
344
+ ( p.lisp_apml_tgtype 1.7434 (2) )
345
+ ( lisp_apml_tgtype 17.1672 (2) )
346
+ ( n.lisp_apml_tgtype 0.6679 (2) )
347
+ ( nn.lisp_apml_tgtype 0.0000 (2) )
348
+ ( pp.lisp_apml_tgtype 1.6494 (3) )
349
+ ( p.lisp_apml_tgtype 1.2861 (3) )
350
+ ( lisp_apml_tgtype 9.5674 (3) )
351
+ ( n.lisp_apml_tgtype -3.1085 (3) )
352
+ ( nn.lisp_apml_tgtype 0.0000 (3) )
353
+ ( pp.lisp_apml_iecount 0.0000 )
354
+ ( p.lisp_apml_iecount -0.7829 )
355
+ ( lisp_apml_iecount -0.5447 )
356
+ ( n.lisp_apml_iecount -1.3461 )
357
+ ( nn.lisp_apml_iecount -0.7178 )
358
+ ( pp.tobi_accent 0.7904 (H*) )
359
+ ( p.tobi_accent 3.0248 (H*) )
360
+ ( tobi_accent 14.1116 (H*) )
361
+ ( n.tobi_accent 0.0000 (H*) )
362
+ ( nn.tobi_accent -3.2402 (H*) )
363
+ ( pp.tobi_accent 0.0000 (L*) )
364
+ ( p.tobi_accent -2.5000 (L*) )
365
+ ( tobi_accent -32.5000 (L*) )
366
+ ( n.tobi_accent -3.0539 (L*) )
367
+ ( nn.tobi_accent -1.7408 (L*) )
368
+ ( pp.tobi_accent 0.0000 (L*+H) )
369
+ ( p.tobi_accent 17.5000 (L*+H) )
370
+ ( tobi_accent -9.0000 (L*+H) )
371
+ ( n.tobi_accent -2.8025 (L*+H) )
372
+ ( nn.tobi_accent -0.5455 (L*+H) )
373
+ ( pp.tobi_accent 0.7900 (L+H*) )
374
+ ( p.tobi_accent 3.0250 (L+H*) )
375
+ ( tobi_accent 14.1150 (L+H*) )
376
+ ( n.tobi_accent 0.0000 (L+H*) )
377
+ ( nn.tobi_accent -3.2400 (L+H*) )
378
+ ( pp.tobi_endtone 5.7534 (L-L%) )
379
+ ( p.tobi_endtone 0.1401 (L-L%) )
380
+ ( tobi_endtone -65 (L-L%) )
381
+ ( n.tobi_endtone -11.1795 (L-L%) )
382
+ ( nn.tobi_endtone -7.8158 (L-L%) )
383
+ ( pp.tobi_endtone 4.4276 (L-H%) )
384
+ ( p.tobi_endtone 1.1220 (L-H%) )
385
+ ( tobi_endtone 20.0000 (L-H%) )
386
+ ( n.tobi_endtone -6.8995 (L-H%) )
387
+ ( nn.tobi_endtone -6.1219 (L-H%) )
388
+ ( pp.tobi_endtone 2.4327 (H-L%) )
389
+ ( p.tobi_endtone 0.0000 (H-L%) )
390
+ ( tobi_endtone -7.5781 (H-L%) )
391
+ ( n.tobi_endtone -2.7208 (H-L%) )
392
+ ( nn.tobi_endtone -14.4838 (H-L%) )
393
+ ( pp.tobi_endtone 0.0000 (H-H%) )
394
+ ( p.tobi_endtone 0.0000 (H-H%) )
395
+ ( tobi_endtone 0.0000 (H-H%) )
396
+ ( n.tobi_endtone 0.0000 (H-H%) )
397
+ ( nn.tobi_endtone 16.1076 (H-H%) )
398
+ ( pp.tobi_endtone -1.8913 (L-) )
399
+ ( p.tobi_endtone -15.5651 (L-) )
400
+ ( tobi_endtone -40.2021 (L-) )
401
+ ( n.tobi_endtone -2.5541 (L-) )
402
+ ( nn.tobi_endtone -2.2224 (L-) )
403
+ ( pp.tobi_endtone -6.6906 (H-) )
404
+ ( p.tobi_endtone -3.5483 (H-) )
405
+ ( tobi_endtone -1.2635 (H-) )
406
+ ( n.tobi_endtone 0.0000 (H-) )
407
+ ( nn.tobi_endtone 0.0000 (H-) )
408
+ ( p.tobi_accent 0.0000 (L+H*L-H%) )
409
+ ( tobi_accent -40.0000 (L+H*L-H%) )
410
+ ( n.tobi_accent 0.0000 (L+H*L-H%) )
411
+ ( pp.syl_break 0.3501 )
412
+ ( p.syl_break -1.0003 )
413
+ ( syl_break -1.5536 )
414
+ ( n.syl_break 0.0720 )
415
+ ( nn.syl_break 0.5989 )
416
+ ( pp.stress -0.9778 )
417
+ ( p.stress -0.8046 )
418
+ ( stress 1.2124 )
419
+ ( n.stress 3.9715 )
420
+ ( nn.stress 2.3914 )
421
+ ( syl_in -2.3468 )
422
+ ( syl_out 0.9792 )
423
+ ( ssyl_in 2.0463 )
424
+ ( ssyl_out 0.0000 )
425
+ ( asyl_in -0.1460 )
426
+ ( asyl_out 0.0000 )
427
+ ( last_accent -1.0992 )
428
+ ( next_accent 0.2935 )
429
+ ( sub_phrases 0.0000 )
430
+ ( lisp_l_spread -60.0000 )
431
+ ))
432
+
433
+ (set! apml_f2b_f0_lr_end
434
+ '(
435
+ ( Intercept 165.0901 )
436
+ ( pp.lisp_apml_tgtype -8.0918 (1) )
437
+ ( p.lisp_apml_tgtype 1.5332 (1) )
438
+ ( lisp_apml_tgtype 29.9802 (1) )
439
+ ( n.lisp_apml_tgtype 0.0000 (1) )
440
+ ( nn.lisp_apml_tgtype 9.7312 (1) )
441
+ ( pp.lisp_apml_tgtype -7.6181 (2) )
442
+ ( p.lisp_apml_tgtype 0.0000 (2) )
443
+ ( lisp_apml_tgtype 20.7778 (2) )
444
+ ( n.lisp_apml_tgtype 0.0000 (2) )
445
+ ( nn.lisp_apml_tgtype 0.0000 (2) )
446
+ ( pp.lisp_apml_tgtype 0.0000 (3) )
447
+ ( p.lisp_apml_tgtype 0.0000 (3) )
448
+ ( lisp_apml_tgtype 13.5550 (3) )
449
+ ( n.lisp_apml_tgtype -6.2170 (3) )
450
+ ( nn.lisp_apml_tgtype 0.0000 (3) )
451
+ ( pp.lisp_apml_iecount 0.0000 )
452
+ ( p.lisp_apml_iecount -0.8428 )
453
+ ( lisp_apml_iecount -1.0894 )
454
+ ( n.lisp_apml_iecount 0.0000 )
455
+ ( nn.lisp_apml_iecount -1.4355 )
456
+ ( pp.tobi_accent 1.5807 (H*) )
457
+ ( p.tobi_accent 0.0000 (H*) )
458
+ ( tobi_accent 7.3057 (H*) )
459
+ ( n.tobi_accent 0.0000 (H*) )
460
+ ( nn.tobi_accent 0.0000 (H*) )
461
+ ( pp.tobi_accent 0.0000 (L*) )
462
+ ( p.tobi_accent 0.0000 (L*) )
463
+ ( tobi_accent -20.0000 (L*) )
464
+ ( n.tobi_accent 0.0000 (L*) )
465
+ ( nn.tobi_accent 0.0000 (L*) )
466
+ ( pp.tobi_accent 0.0000 (L*+H) )
467
+ ( p.tobi_accent 5.0000 (L*+H) )
468
+ ( tobi_accent 12.0000 (L*+H) )
469
+ ( n.tobi_accent -2.8850 (L*+H) )
470
+ ( nn.tobi_accent -1.0910 (L*+H) )
471
+ ( pp.tobi_accent 1.5800 (L+H*) )
472
+ ( p.tobi_accent 0.0000 (L+H*) )
473
+ ( tobi_accent 7.3100 (L+H*) )
474
+ ( n.tobi_accent 0.0000 (L+H*) )
475
+ ( nn.tobi_accent 0.0000 (L+H*) )
476
+ ( pp.tobi_endtone 7.8833 (L-L%) )
477
+ ( p.tobi_endtone 0.0000 (L-L%) )
478
+ ( tobi_endtone -80 (L-L%) )
479
+ ( n.tobi_endtone -35 (L-L%) )
480
+ ( nn.tobi_endtone -6.4769 (L-L%) )
481
+ ( pp.tobi_endtone 7.1936 (L-H%) )
482
+ ( p.tobi_endtone 0.0000 (L-H%) )
483
+ ( tobi_endtone 40.0000 (L-H%) )
484
+ ( n.tobi_endtone -13.7990 (L-H%) )
485
+ ( nn.tobi_endtone -5.6305 (L-H%) )
486
+ ( pp.tobi_endtone 4.8654 (H-L%) )
487
+ ( p.tobi_endtone 0.0000 (H-L%) )
488
+ ( tobi_endtone -15.1561 (H-L%) )
489
+ ( n.tobi_endtone 0.0000 (H-L%) )
490
+ ( nn.tobi_endtone -9.8107 (H-L%) )
491
+ ( pp.tobi_endtone 0.0000 (H-H%) )
492
+ ( p.tobi_endtone 0.0000 (H-H%) )
493
+ ( tobi_endtone 0.0000 (H-H%) )
494
+ ( n.tobi_endtone 0.0000 (H-H%) )
495
+ ( nn.tobi_endtone 0.0000 (H-H%) )
496
+ ( pp.tobi_endtone 0.0000 (L-) )
497
+ ( p.tobi_endtone -15.1705 (L-) )
498
+ ( tobi_endtone -43.6801 (L-) )
499
+ ( n.tobi_endtone 0.0000 (L-) )
500
+ ( nn.tobi_endtone -0.8085 (L-) )
501
+ ( pp.tobi_endtone 0.0000 (H-) )
502
+ ( p.tobi_endtone -7.0967 (H-) )
503
+ ( tobi_endtone 0.0000 (H-) )
504
+ ( n.tobi_endtone 0.0000 (H-) )
505
+ ( nn.tobi_endtone 0.0000 (H-) )
506
+ ( p.tobi_accent 0.0000 (L+H*L-H%) )
507
+ ( tobi_accent 60.0000 (L+H*L-H%) )
508
+ ( n.tobi_accent -60.0000 (L+H*L-H%) )
509
+ ( pp.syl_break 0.0000 )
510
+ ( p.syl_break -0.3765 )
511
+ ( syl_break -3.1072 )
512
+ ( n.syl_break 0.0000 )
513
+ ( nn.syl_break 0.6338 )
514
+ ( pp.stress 0.0000 )
515
+ ( p.stress -1.6093 )
516
+ ( stress -0.7136 )
517
+ ( n.stress 5.9479 )
518
+ ( nn.stress 1.8619 )
519
+ ( syl_in -2.1619 )
520
+ ( syl_out 0.9481 )
521
+ ( ssyl_in 2.0175 )
522
+ ( ssyl_out 0.0000 )
523
+ ( asyl_in -0.2919 )
524
+ ( asyl_out 0.0000 )
525
+ ( last_accent -2.1984 )
526
+ ( next_accent 0.0000 )
527
+ ( sub_phrases 0.0000 )
528
+ ( lisp_l_spread -60.0000 )
529
+ ))
530
+
CosyVoice-ttsfrd/resource/festival/apml_kaldurtreeZ.scm ADDED
@@ -0,0 +1,996 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; A tree to predict zcore durations build from f2b
35
+ ;;; doesn't use actual phonemes so it can have better generalizations
36
+ ;;;
37
+ ;;; Basically copied from ked
38
+ ;;;
39
+
40
+ (set! apml_kal_durs
41
+ '(
42
+ (uh 0.067 0.025)
43
+ (hh 0.061 0.028)
44
+ (ao 0.138 0.046)
45
+ (hv 0.053 0.020)
46
+ (v 0.051 0.019)
47
+ (ih 0.058 0.023)
48
+ (el 0.111 0.043)
49
+ (ey 0.132 0.042)
50
+ (em 0.080 0.033)
51
+ (jh 0.094 0.024)
52
+ (w 0.054 0.023)
53
+ (uw 0.107 0.044)
54
+ (ae 0.120 0.036)
55
+ (en 0.117 0.056)
56
+ (k 0.089 0.034)
57
+ (y 0.048 0.025)
58
+ (axr 0.147 0.035)
59
+ ; (l 0.056 0.026)
60
+ (l 0.066 0.026)
61
+ (ng 0.064 0.024)
62
+ (zh 0.071 0.030)
63
+ (z 0.079 0.034)
64
+ (brth 0.246 0.046)
65
+ (m 0.069 0.028)
66
+ (iy 0.097 0.041)
67
+ (n 0.059 0.025)
68
+ (ah 0.087 0.031)
69
+ (er 0.086 0.010)
70
+ (b 0.069 0.024)
71
+ (pau 0.200 0.1)
72
+ (aw 0.166 0.053)
73
+ (p 0.088 0.030)
74
+ (ch 0.115 0.025)
75
+ (ow 0.134 0.039)
76
+ (dh 0.031 0.016)
77
+ (nx 0.049 0.100)
78
+ (d 0.048 0.021)
79
+ (ax 0.046 0.024)
80
+ (h# 0.060 0.083)
81
+ (r 0.053 0.031)
82
+ (eh 0.095 0.036)
83
+ (ay 0.137 0.047)
84
+ (oy 0.183 0.050)
85
+ (f 0.095 0.033)
86
+ (sh 0.108 0.031)
87
+ (s 0.102 0.037)
88
+ (g 0.064 0.021)
89
+ (dx 0.031 0.016)
90
+ (th 0.093 0.050)
91
+ (aa 0.094 0.037)
92
+ (t 0.070 0.020)
93
+ )
94
+ )
95
+
96
+ (set! apml_kal_duration_cart_tree
97
+ '
98
+ ((name is pau)
99
+ ((emph_sil is +)
100
+ ((0.0 -0.5))
101
+ ((p.R:SylStructure.parent.parent.lisp_apml_pause = 0.2)
102
+ ((0.0 0.0))
103
+ ((p.R:SylStructure.parent.parent.lisp_apml_pause = 0.4)
104
+ ((0.0 2.0))
105
+ ((p.R:SylStructure.parent.parent.lisp_apml_pause = 0.6)
106
+ ((0.0 4.0))
107
+ ((p.R:SylStructure.parent.parent.lisp_apml_pause = 0.8)
108
+ ((0.0 6.0))
109
+ ((p.R:SylStructure.parent.parent.lisp_apml_pause = 1.0)
110
+ ((0.0 8.0))
111
+ ((p.R:SylStructure.parent.parent.lisp_apml_pause = 1.5)
112
+ ((0.0 13.0))
113
+ ((p.R:SylStructure.parent.parent.lisp_apml_pause = 2.0)
114
+ ((0.0 18.0))
115
+ ((p.R:SylStructure.parent.parent.lisp_apml_pause = 2.5)
116
+ ((0.0 23.0))
117
+ ((p.R:SylStructure.parent.parent.lisp_apml_pause = 3.0)
118
+ ((0.0 28.0))
119
+ ((p.R:SylStructure.parent.parent.pbreak is BB)
120
+ ((0.0 2.0))
121
+ ((0.0 0.0)))))))))))))
122
+ ((R:SylStructure.parent.accented is 0)
123
+ ((n.ph_ctype is 0)
124
+ ((p.ph_vlng is 0)
125
+ ((R:SylStructure.parent.syl_codasize < 1.5)
126
+ ((p.ph_ctype is n)
127
+ ((ph_ctype is f)
128
+ ((0.559208 -0.783163))
129
+ ((1.05215 -0.222704)))
130
+ ((ph_ctype is s)
131
+ ((R:SylStructure.parent.syl_break is 2)
132
+ ((0.589948 0.764459))
133
+ ((R:SylStructure.parent.asyl_in < 0.7)
134
+ ((1.06385 0.567944))
135
+ ((0.691943 0.0530272))))
136
+ ((ph_vlng is l)
137
+ ((pp.ph_vfront is 1)
138
+ ((1.06991 0.766486))
139
+ ((R:SylStructure.parent.syl_break is 1)
140
+ ((0.69665 0.279248))
141
+ ((0.670353 0.0567774))))
142
+ ((p.ph_ctype is s)
143
+ ((seg_onsetcoda is coda)
144
+ ((0.828638 -0.038356))
145
+ ((ph_ctype is f)
146
+ ((0.7631 -0.545853))
147
+ ((0.49329 -0.765994))))
148
+ ((R:SylStructure.parent.parent.gpos is det)
149
+ ((R:SylStructure.parent.last_accent < 0.3)
150
+ ((R:SylStructure.parent.sub_phrases < 1)
151
+ ((0.811686 0.160195))
152
+ ((0.799015 0.713958)))
153
+ ((0.731599 -0.215472)))
154
+ ((ph_ctype is r)
155
+ ((0.673487 0.092772))
156
+ ((R:SylStructure.parent.asyl_in < 1)
157
+ ((0.745273 0.00132813))
158
+ ((0.75457 -0.334898)))))))))
159
+ ((pos_in_syl < 0.5)
160
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
161
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
162
+ ((0.902446 -0.041618))
163
+ ((R:SylStructure.parent.sub_phrases < 2.3)
164
+ ((0.900629 0.262952))
165
+ ((1.18474 0.594794))))
166
+ ((seg_onset_stop is 0)
167
+ ((R:SylStructure.parent.position_type is mid)
168
+ ((0.512323 -0.760444))
169
+ ((R:SylStructure.parent.syl_out < 6.8)
170
+ ((pp.ph_vlng is a)
171
+ ((0.640575 -0.450449))
172
+ ((ph_ctype is f)
173
+ ((R:SylStructure.parent.sub_phrases < 1.3)
174
+ ((0.862876 -0.296956))
175
+ ((R:SylStructure.parent.syl_out < 2.4)
176
+ ((0.803215 0.0422868))
177
+ ((0.877856 -0.154465))))
178
+ ((R:SylStructure.parent.syl_out < 3.6)
179
+ ((R:SylStructure.parent.syl_out < 1.2)
180
+ ((0.567081 -0.264199))
181
+ ((0.598043 -0.541738)))
182
+ ((0.676843 -0.166623)))))
183
+ ((0.691678 -0.57173))))
184
+ ((R:SylStructure.parent.parent.gpos is cc)
185
+ ((1.15995 0.313289))
186
+ ((pp.ph_vfront is 1)
187
+ ((0.555993 0.0695819))
188
+ ((R:SylStructure.parent.asyl_in < 1.2)
189
+ ((R:SylStructure.parent.sub_phrases < 2.7)
190
+ ((0.721635 -0.367088))
191
+ ((0.71919 -0.194887)))
192
+ ((0.547052 -0.0637491)))))))
193
+ ((ph_ctype is s)
194
+ ((R:SylStructure.parent.syl_break is 0)
195
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 1)
196
+ ((0.650007 -0.333421))
197
+ ((0.846301 -0.165383)))
198
+ ((0.527756 -0.516332)))
199
+ ((R:SylStructure.parent.syl_break is 0)
200
+ ((p.ph_ctype is s)
201
+ ((0.504414 -0.779112))
202
+ ((0.812498 -0.337611)))
203
+ ((pos_in_syl < 1.4)
204
+ ((0.513041 -0.745807))
205
+ ((p.ph_ctype is s)
206
+ ((0.350582 -1.04907))
207
+ ((0.362 -0.914974))))))))
208
+ ((R:SylStructure.parent.syl_break is 0)
209
+ ((ph_ctype is n)
210
+ ((R:SylStructure.parent.position_type is initial)
211
+ ((pos_in_syl < 1.2)
212
+ ((0.580485 0.172658))
213
+ ((0.630973 -0.101423)))
214
+ ((0.577937 -0.360092)))
215
+ ((R:SylStructure.parent.syl_out < 2.9)
216
+ ((R:SylStructure.parent.syl_out < 1.1)
217
+ ((R:SylStructure.parent.position_type is initial)
218
+ ((0.896092 0.764189))
219
+ ((R:SylStructure.parent.sub_phrases < 3.6)
220
+ ((ph_ctype is s)
221
+ ((0.877362 0.555132))
222
+ ((0.604511 0.369882)))
223
+ ((0.799982 0.666966))))
224
+ ((seg_onsetcoda is coda)
225
+ ((p.ph_vlng is a)
226
+ ((R:SylStructure.parent.last_accent < 0.4)
227
+ ((0.800736 0.240634))
228
+ ((0.720606 0.486176)))
229
+ ((1.18173 0.573811)))
230
+ ((0.607147 0.194468))))
231
+ ((ph_ctype is r)
232
+ ((0.88377 0.499383))
233
+ ((R:SylStructure.parent.last_accent < 0.5)
234
+ ((R:SylStructure.parent.position_type is initial)
235
+ ((R:SylStructure.parent.parent.word_numsyls < 2.4)
236
+ ((0.62798 0.0737318))
237
+ ((0.787334 0.331014)))
238
+ ((ph_ctype is s)
239
+ ((0.808368 0.0929299))
240
+ ((0.527948 -0.0443271))))
241
+ ((seg_coda_fric is 0)
242
+ ((p.ph_vlng is a)
243
+ ((0.679745 0.517681))
244
+ ((R:SylStructure.parent.sub_phrases < 1.1)
245
+ ((0.759979 0.128316))
246
+ ((0.775233 0.361383))))
247
+ ((R:SylStructure.parent.last_accent < 1.3)
248
+ ((0.696255 0.054136))
249
+ ((0.632425 0.246742))))))))
250
+ ((pos_in_syl < 0.3)
251
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
252
+ ((0.847602 0.621547))
253
+ ((ph_ctype is s)
254
+ ((0.880645 0.501679))
255
+ ((R:SylStructure.parent.sub_phrases < 3.3)
256
+ ((R:SylStructure.parent.sub_phrases < 0.3)
257
+ ((0.901014 -0.042049))
258
+ ((0.657493 0.183226)))
259
+ ((0.680126 0.284799)))))
260
+ ((ph_ctype is s)
261
+ ((p.ph_vlng is s)
262
+ ((0.670033 -0.820934))
263
+ ((0.863306 -0.348735)))
264
+ ((ph_ctype is n)
265
+ ((R:SylStructure.parent.asyl_in < 1.2)
266
+ ((0.656966 -0.40092))
267
+ ((0.530966 -0.639366)))
268
+ ((seg_coda_fric is 0)
269
+ ((1.04153 0.364857))
270
+ ((pos_in_syl < 1.2)
271
+ ((R:SylStructure.parent.syl_out < 3.4)
272
+ ((0.81503 -0.00768613))
273
+ ((0.602665 -0.197753)))
274
+ ((0.601844 -0.394632)))))))))
275
+ ((n.ph_ctype is f)
276
+ ((pos_in_syl < 1.5)
277
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
278
+ ((pos_in_syl < 0.1)
279
+ ((1.63863 0.938841))
280
+ ((R:SylStructure.parent.position_type is initial)
281
+ ((0.897722 -0.0796637))
282
+ ((nn.ph_vheight is 0)
283
+ ((0.781081 0.480026))
284
+ ((0.779711 0.127175)))))
285
+ ((ph_ctype is r)
286
+ ((p.ph_ctype is s)
287
+ ((0.581329 -0.708767))
288
+ ((0.564366 -0.236212)))
289
+ ((ph_vlng is a)
290
+ ((p.ph_ctype is r)
291
+ ((0.70992 -0.273389))
292
+ ((R:SylStructure.parent.parent.gpos is in)
293
+ ((0.764696 0.0581338))
294
+ ((nn.ph_vheight is 0)
295
+ ((0.977737 0.721904))
296
+ ((R:SylStructure.parent.sub_phrases < 2.2)
297
+ ((pp.ph_vfront is 0)
298
+ ((0.586708 0.0161206))
299
+ ((0.619949 0.227372)))
300
+ ((0.707285 0.445569))))))
301
+ ((ph_ctype is n)
302
+ ((R:SylStructure.parent.syl_break is 1)
303
+ ((nn.ph_vfront is 2)
304
+ ((0.430295 -0.120097))
305
+ ((0.741371 0.219042)))
306
+ ((0.587492 0.321245)))
307
+ ((p.ph_ctype is n)
308
+ ((0.871586 0.134075))
309
+ ((p.ph_ctype is r)
310
+ ((0.490751 -0.466418))
311
+ ((R:SylStructure.parent.syl_codasize < 1.3)
312
+ ((R:SylStructure.parent.sub_phrases < 2.2)
313
+ ((p.ph_ctype is s)
314
+ ((0.407452 -0.425925))
315
+ ((0.644771 -0.542809)))
316
+ ((0.688772 -0.201899)))
317
+ ((ph_vheight is 1)
318
+ ((nn.ph_vheight is 0)
319
+ ((0.692018 0.209018))
320
+ ((0.751345 -0.178136)))
321
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.3)
322
+ ((R:SylStructure.parent.asyl_in < 1.5)
323
+ ((0.599633 -0.235593))
324
+ ((0.60042 0.126118)))
325
+ ((p.ph_vlng is a)
326
+ ((0.7148 -0.174812))
327
+ ((R:SylStructure.parent.parent.gpos is content)
328
+ ((0.761296 -0.231509))
329
+ ((0.813081 -0.536405)))))))))))))
330
+ ((ph_ctype is n)
331
+ ((0.898844 0.163343))
332
+ ((p.ph_vlng is s)
333
+ ((seg_coda_fric is 0)
334
+ ((0.752921 -0.45528))
335
+ ((0.890079 -0.0998025)))
336
+ ((ph_ctype is f)
337
+ ((0.729376 -0.930547))
338
+ ((ph_ctype is s)
339
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 0)
340
+ ((0.745052 -0.634119))
341
+ ((0.521502 -0.760176)))
342
+ ((R:SylStructure.parent.syl_break is 1)
343
+ ((0.766575 -0.121355))
344
+ ((0.795616 -0.557509))))))))
345
+ ((p.ph_vlng is 0)
346
+ ((p.ph_ctype is r)
347
+ ((ph_vlng is 0)
348
+ ((0.733659 -0.402734))
349
+ ((R:SylStructure.parent.sub_phrases < 1.5)
350
+ ((ph_vlng is s)
351
+ ((0.326176 -0.988478))
352
+ ((n.ph_ctype is s)
353
+ ((0.276471 -0.802536))
354
+ ((0.438283 -0.900628))))
355
+ ((nn.ph_vheight is 0)
356
+ ((ph_vheight is 2)
357
+ ((0.521 -0.768992))
358
+ ((0.615436 -0.574918)))
359
+ ((ph_vheight is 1)
360
+ ((0.387376 -0.756359))
361
+ ((pos_in_syl < 0.3)
362
+ ((0.417235 -0.808937))
363
+ ((0.384043 -0.93315)))))))
364
+ ((ph_vlng is a)
365
+ ((ph_ctype is 0)
366
+ ((n.ph_ctype is s)
367
+ ((p.ph_ctype is f)
368
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
369
+ ((0.415908 -0.428493))
370
+ ((pos_in_syl < 0.1)
371
+ ((0.790441 0.0211071))
372
+ ((0.452465 -0.254485))))
373
+ ((p.ph_ctype is s)
374
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
375
+ ((0.582447 -0.389966))
376
+ ((0.757648 0.185781)))
377
+ ((R:SylStructure.parent.sub_phrases < 1.4)
378
+ ((0.628965 0.422551))
379
+ ((0.713613 0.145576)))))
380
+ ((seg_onset_stop is 0)
381
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 0)
382
+ ((pp.ph_vfront is 1)
383
+ ((0.412363 -0.62319))
384
+ ((R:SylStructure.parent.syl_out < 3.6)
385
+ ((0.729259 -0.317324))
386
+ ((0.441633 -0.591051))))
387
+ ((R:SylStructure.parent.syl_break is 1)
388
+ ((R:SylStructure.parent.sub_phrases < 2.7)
389
+ ((0.457728 -0.405607))
390
+ ((0.532411 -0.313148)))
391
+ ((R:SylStructure.parent.last_accent < 0.3)
392
+ ((1.14175 0.159416))
393
+ ((0.616396 -0.254651)))))
394
+ ((R:SylStructure.parent.position_type is initial)
395
+ ((0.264181 -0.799896))
396
+ ((0.439801 -0.551309)))))
397
+ ((R:SylStructure.parent.position_type is final)
398
+ ((0.552027 -0.707084))
399
+ ((0.585661 -0.901874))))
400
+ ((ph_ctype is s)
401
+ ((pos_in_syl < 1.2)
402
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
403
+ ((pp.ph_vfront is 1)
404
+ ((0.607449 0.196466))
405
+ ((0.599662 0.00382414)))
406
+ ((0.64109 -0.12859)))
407
+ ((pp.ph_vfront is 1)
408
+ ((0.720484 -0.219339))
409
+ ((0.688707 -0.516734))))
410
+ ((ph_vlng is s)
411
+ ((n.ph_ctype is s)
412
+ ((R:SylStructure.parent.parent.gpos is content)
413
+ ((R:SylStructure.parent.position_type is single)
414
+ ((0.659206 0.159445))
415
+ ((R:SylStructure.parent.parent.word_numsyls < 3.5)
416
+ ((R:SylStructure.parent.sub_phrases < 2)
417
+ ((0.447186 -0.419103))
418
+ ((0.631822 -0.0928561)))
419
+ ((0.451623 -0.576116))))
420
+ ((ph_vheight is 3)
421
+ ((0.578626 -0.64583))
422
+ ((0.56636 -0.4665))))
423
+ ((R:SylStructure.parent.parent.gpos is in)
424
+ ((0.771516 -0.217292))
425
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
426
+ ((0.688571 -0.304382))
427
+ ((R:SylStructure.parent.parent.gpos is content)
428
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 1)
429
+ ((n.ph_ctype is n)
430
+ ((0.556085 -0.572203))
431
+ ((0.820173 -0.240338)))
432
+ ((R:SylStructure.parent.parent.word_numsyls < 2.2)
433
+ ((0.595398 -0.588171))
434
+ ((0.524737 -0.95797))))
435
+ ((R:SylStructure.parent.sub_phrases < 3.9)
436
+ ((0.371492 -0.959427))
437
+ ((0.440479 -0.845747)))))))
438
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 0)
439
+ ((p.ph_ctype is f)
440
+ ((0.524088 -0.482247))
441
+ ((nn.ph_vheight is 1)
442
+ ((0.587666 -0.632362))
443
+ ((ph_vlng is l)
444
+ ((R:SylStructure.parent.position_type is final)
445
+ ((0.513286 -0.713117))
446
+ ((0.604613 -0.924308)))
447
+ ((R:SylStructure.parent.syl_codasize < 2.2)
448
+ ((0.577997 -0.891342))
449
+ ((0.659804 -1.15252))))))
450
+ ((pp.ph_vlng is s)
451
+ ((ph_ctype is f)
452
+ ((0.813383 -0.599624))
453
+ ((0.984027 -0.0771909)))
454
+ ((p.ph_ctype is f)
455
+ ((R:SylStructure.parent.parent.gpos is in)
456
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 1)
457
+ ((0.313572 -1.03242))
458
+ ((0.525854 -0.542799)))
459
+ ((R:SylStructure.parent.syl_out < 2.8)
460
+ ((0.613007 -0.423979))
461
+ ((0.570258 -0.766379))))
462
+ ((R:SylStructure.parent.syl_break is 1)
463
+ ((R:SylStructure.parent.parent.gpos is to)
464
+ ((0.364585 -0.792895))
465
+ ((ph_vlng is l)
466
+ ((0.69143 -0.276816))
467
+ ((0.65673 -0.523721))))
468
+ ((R:SylStructure.parent.syl_out < 3.6)
469
+ ((R:SylStructure.parent.position_type is initial)
470
+ ((0.682096 -0.488102))
471
+ ((0.406364 -0.731758)))
472
+ ((0.584694 -0.822229)))))))))))
473
+ ((n.ph_ctype is r)
474
+ ((R:SylStructure.parent.position_type is initial)
475
+ ((p.ph_vlng is a)
476
+ ((0.797058 1.02334))
477
+ ((ph_ctype is s)
478
+ ((1.0548 0.536277))
479
+ ((0.817253 0.138201))))
480
+ ((R:SylStructure.parent.sub_phrases < 1.1)
481
+ ((R:SylStructure.parent.syl_out < 3.3)
482
+ ((0.884574 -0.23471))
483
+ ((0.772063 -0.525292)))
484
+ ((nn.ph_vfront is 1)
485
+ ((1.25254 0.417485))
486
+ ((0.955557 -0.0781996)))))
487
+ ((pp.ph_vfront is 0)
488
+ ((ph_ctype is f)
489
+ ((n.ph_ctype is s)
490
+ ((R:SylStructure.parent.parent.gpos is content)
491
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 0)
492
+ ((0.583506 -0.56941))
493
+ ((0.525949 -0.289362)))
494
+ ((0.749316 -0.0921038)))
495
+ ((p.ph_vlng is s)
496
+ ((0.734234 0.139463))
497
+ ((0.680119 -0.0708717))))
498
+ ((ph_vlng is s)
499
+ ((ph_vheight is 1)
500
+ ((0.908712 -0.618971))
501
+ ((0.55344 -0.840495)))
502
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 1.2)
503
+ ((pos_in_syl < 1.2)
504
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
505
+ ((0.838715 0.00913392))
506
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 1)
507
+ ((ph_vheight is 2)
508
+ ((0.555513 -0.512523))
509
+ ((R:SylStructure.parent.position_type is initial)
510
+ ((0.758711 0.121704))
511
+ ((0.737555 -0.25637))))
512
+ ((R:SylStructure.parent.syl_out < 3.1)
513
+ ((n.ph_ctype is s)
514
+ ((0.611756 -0.474522))
515
+ ((1.05437 -0.247206)))
516
+ ((R:SylStructure.parent.syl_codasize < 2.2)
517
+ ((R:SylStructure.parent.position_type is final)
518
+ ((0.567761 -0.597866))
519
+ ((0.785599 -0.407765)))
520
+ ((0.575598 -0.741256))))))
521
+ ((ph_ctype is s)
522
+ ((n.ph_ctype is s)
523
+ ((0.661069 -1.08426))
524
+ ((0.783184 -0.39789)))
525
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 1)
526
+ ((R:SylStructure.parent.sub_phrases < 2.6)
527
+ ((0.511323 -0.666011))
528
+ ((0.691878 -0.499492)))
529
+ ((ph_ctype is r)
530
+ ((0.482131 -0.253186))
531
+ ((0.852955 -0.372832))))))
532
+ ((0.854447 -0.0936489)))))
533
+ ((R:SylStructure.parent.position_type is final)
534
+ ((0.685939 -0.249982))
535
+ ((R:SylStructure.parent.syl_out < 3.2)
536
+ ((0.989843 0.18086))
537
+ ((0.686805 -0.0402908)))))))))
538
+ ((R:SylStructure.parent.syl_out < 2.4)
539
+ ((R:SylStructure.parent.syl_out < 0.2)
540
+ ((seg_onsetcoda is coda)
541
+ ((ph_ctype is s)
542
+ ((R:SylStructure.parent.syl_break is 4)
543
+ ((pp.ph_vlng is 0)
544
+ ((0.959737 1.63203))
545
+ ((1.20714 0.994933)))
546
+ ((n.ph_ctype is 0)
547
+ ((R:SylStructure.parent.syl_break is 2)
548
+ ((0.864809 0.214457))
549
+ ((0.874278 0.730381)))
550
+ ((pp.ph_vfront is 0)
551
+ ((seg_coda_fric is 0)
552
+ ((1.20844 -0.336221))
553
+ ((1.01357 0.468302)))
554
+ ((0.658106 -0.799121)))))
555
+ ((n.ph_ctype is f)
556
+ ((ph_ctype is f)
557
+ ((1.26332 0.0300613))
558
+ ((ph_vlng is d)
559
+ ((1.02719 1.1649))
560
+ ((ph_ctype is 0)
561
+ ((R:SylStructure.parent.asyl_in < 1.2)
562
+ ((1.14048 2.2668))
563
+ ((ph_vheight is 1)
564
+ ((1.15528 1.50375))
565
+ ((1.42406 2.07927))))
566
+ ((R:SylStructure.parent.sub_phrases < 1.1)
567
+ ((0.955892 1.10243))
568
+ ((R:SylStructure.parent.syl_break is 2)
569
+ ((1.32682 1.8432))
570
+ ((1.27582 1.59853)))))))
571
+ ((n.ph_ctype is 0)
572
+ ((ph_ctype is n)
573
+ ((R:SylStructure.parent.syl_break is 2)
574
+ ((1.45399 1.12927))
575
+ ((1.05543 0.442376)))
576
+ ((R:SylStructure.parent.syl_break is 4)
577
+ ((R:SylStructure.parent.position_type is final)
578
+ ((ph_ctype is f)
579
+ ((1.46434 1.76508))
580
+ ((0.978055 0.7486)))
581
+ ((1.2395 2.30826)))
582
+ ((ph_ctype is 0)
583
+ ((0.935325 1.69917))
584
+ ((nn.ph_vfront is 1)
585
+ ((1.20456 1.31128))
586
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
587
+ ((nn.ph_vheight is 0)
588
+ ((1.16907 0.212421))
589
+ ((0.952091 0.653094)))
590
+ ((p.ph_ctype is 0)
591
+ ((1.05502 1.25802))
592
+ ((0.818731 0.777568))))))))
593
+ ((ph_ctype is f)
594
+ ((p.ph_ctype is 0)
595
+ ((1.03918 0.163941))
596
+ ((0.737545 -0.167063)))
597
+ ((R:SylStructure.parent.position_type is final)
598
+ ((n.ph_ctype is n)
599
+ ((R:SylStructure.parent.last_accent < 0.5)
600
+ ((R:SylStructure.parent.sub_phrases < 2.8)
601
+ ((0.826207 -0.000859005))
602
+ ((0.871119 0.273433)))
603
+ ((R:SylStructure.parent.parent.word_numsyls < 2.4)
604
+ ((1.17405 1.05694))
605
+ ((0.858394 0.244916))))
606
+ ((R:SylStructure.parent.syl_codasize < 2.2)
607
+ ((p.ph_ctype is 0)
608
+ ((1.14092 1.21187))
609
+ ((R:SylStructure.parent.syl_break is 2)
610
+ ((1.02653 0.59865))
611
+ ((0.94248 1.1634))))
612
+ ((seg_coda_fric is 0)
613
+ ((1.07441 0.292935))
614
+ ((1.15736 0.92574)))))
615
+ ((ph_vlng is s)
616
+ ((R:SylStructure.parent.syl_break is 2)
617
+ ((1.34638 1.23484))
618
+ ((0.951514 2.02008)))
619
+ ((ph_ctype is 0)
620
+ ((p.ph_ctype is r)
621
+ ((0.806106 0.697089))
622
+ ((R:SylStructure.parent.syl_break is 2)
623
+ ((1.10891 0.992197))
624
+ ((1.04657 1.51093))))
625
+ ((1.18165 0.520952)))))))))
626
+ ((p.ph_vlng is 0)
627
+ ((pos_in_syl < 0.7)
628
+ ((R:SylStructure.parent.position_type is final)
629
+ ((ph_ctype is r)
630
+ ((0.966357 0.185827))
631
+ ((ph_ctype is s)
632
+ ((0.647163 0.0332298))
633
+ ((0.692972 -0.534917))))
634
+ ((ph_ctype is s)
635
+ ((0.881521 0.575107))
636
+ ((p.ph_ctype is f)
637
+ ((0.8223 -0.111275))
638
+ ((R:SylStructure.parent.last_accent < 0.3)
639
+ ((0.969188 0.09447))
640
+ ((0.894438 0.381947))))))
641
+ ((p.ph_ctype is f)
642
+ ((0.479748 -0.490108))
643
+ ((0.813125 -0.201268))))
644
+ ((ph_ctype is s)
645
+ ((0.908566 1.20397))
646
+ ((R:SylStructure.parent.last_accent < 1.2)
647
+ ((0.88078 0.636568))
648
+ ((0.978087 1.07763))))))
649
+ ((pos_in_syl < 1.3)
650
+ ((R:SylStructure.parent.syl_break is 0)
651
+ ((pos_in_syl < 0.1)
652
+ ((R:SylStructure.parent.position_type is initial)
653
+ ((p.ph_ctype is n)
654
+ ((0.801651 -0.0163359))
655
+ ((ph_ctype is s)
656
+ ((n.ph_ctype is r)
657
+ ((0.893307 1.07253))
658
+ ((p.ph_vlng is 0)
659
+ ((0.92651 0.525806))
660
+ ((0.652444 0.952792))))
661
+ ((p.ph_vlng is 0)
662
+ ((seg_onsetcoda is coda)
663
+ ((0.820151 0.469117))
664
+ ((p.ph_ctype is f)
665
+ ((0.747972 -0.0716448))
666
+ ((ph_ctype is f)
667
+ ((0.770882 0.457137))
668
+ ((0.840905 0.102492)))))
669
+ ((R:SylStructure.parent.syl_out < 1.1)
670
+ ((0.667824 0.697337))
671
+ ((0.737967 0.375114))))))
672
+ ((ph_vheight is 1)
673
+ ((0.624353 0.410671))
674
+ ((R:SylStructure.parent.asyl_in < 0.8)
675
+ ((0.647905 -0.331055))
676
+ ((p.ph_ctype is s)
677
+ ((0.629039 -0.240616))
678
+ ((0.749277 -0.0191273))))))
679
+ ((ph_vheight is 3)
680
+ ((p.ph_ctype is s)
681
+ ((0.626922 0.556537))
682
+ ((0.789357 0.153892)))
683
+ ((seg_onsetcoda is coda)
684
+ ((n.ph_ctype is 0)
685
+ ((R:SylStructure.parent.parent.word_numsyls < 3.4)
686
+ ((0.744714 0.123242))
687
+ ((0.742039 0.295753)))
688
+ ((seg_coda_fric is 0)
689
+ ((R:SylStructure.parent.parent.word_numsyls < 2.4)
690
+ ((ph_vheight is 1)
691
+ ((0.549715 -0.341018))
692
+ ((0.573641 -0.00893114)))
693
+ ((nn.ph_vfront is 2)
694
+ ((0.67099 -0.744625))
695
+ ((0.664438 -0.302803))))
696
+ ((p.ph_vlng is 0)
697
+ ((0.630028 0.113815))
698
+ ((0.632794 -0.128733)))))
699
+ ((ph_ctype is r)
700
+ ((0.367169 -0.854509))
701
+ ((0.94334 -0.216179))))))
702
+ ((n.ph_ctype is f)
703
+ ((ph_vlng is 0)
704
+ ((1.3089 0.46195))
705
+ ((R:SylStructure.parent.syl_codasize < 1.3)
706
+ ((1.07673 0.657169))
707
+ ((pp.ph_vlng is 0)
708
+ ((0.972319 1.08222))
709
+ ((1.00038 1.46257)))))
710
+ ((p.ph_vlng is l)
711
+ ((1.03617 0.785204))
712
+ ((p.ph_vlng is a)
713
+ ((R:SylStructure.parent.position_type is final)
714
+ ((1.00681 0.321168))
715
+ ((0.928115 0.950834)))
716
+ ((ph_vlng is 0)
717
+ ((pos_in_syl < 0.1)
718
+ ((R:SylStructure.parent.position_type is final)
719
+ ((0.863682 -0.167374))
720
+ ((nn.ph_vheight is 0)
721
+ ((p.ph_ctype is f)
722
+ ((0.773591 -0.00374425))
723
+ ((R:SylStructure.parent.syl_out < 1.1)
724
+ ((0.951802 0.228448))
725
+ ((1.02282 0.504252))))
726
+ ((1.09721 0.736476))))
727
+ ((R:SylStructure.parent.position_type is final)
728
+ ((1.04302 0.0590974))
729
+ ((0.589208 -0.431535))))
730
+ ((n.ph_ctype is 0)
731
+ ((1.27879 1.00642))
732
+ ((ph_vlng is s)
733
+ ((R:SylStructure.parent.asyl_in < 1.4)
734
+ ((0.935787 0.481652))
735
+ ((0.9887 0.749861)))
736
+ ((R:SylStructure.parent.syl_out < 1.1)
737
+ ((R:SylStructure.parent.position_type is final)
738
+ ((0.921307 0.0696307))
739
+ ((0.83675 0.552212)))
740
+ ((0.810076 -0.0479225))))))))))
741
+ ((ph_ctype is s)
742
+ ((n.ph_ctype is s)
743
+ ((0.706959 -1.0609))
744
+ ((p.ph_ctype is n)
745
+ ((0.850614 -0.59933))
746
+ ((n.ph_ctype is r)
747
+ ((0.665947 0.00698725))
748
+ ((n.ph_ctype is 0)
749
+ ((R:SylStructure.parent.position_type is initial)
750
+ ((0.762889 -0.0649044))
751
+ ((0.723956 -0.248899)))
752
+ ((R:SylStructure.parent.sub_phrases < 1.4)
753
+ ((0.632957 -0.601987))
754
+ ((0.889114 -0.302401)))))))
755
+ ((ph_ctype is f)
756
+ ((R:SylStructure.parent.syl_codasize < 2.2)
757
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
758
+ ((R:SylStructure.parent.syl_out < 1.1)
759
+ ((0.865267 0.164636))
760
+ ((0.581827 -0.0989051)))
761
+ ((nn.ph_vfront is 2)
762
+ ((0.684459 -0.316836))
763
+ ((0.778854 -0.0961191))))
764
+ ((R:SylStructure.parent.syl_out < 1.1)
765
+ ((p.ph_ctype is s)
766
+ ((0.837964 -0.429437))
767
+ ((0.875304 -0.0652743)))
768
+ ((0.611071 -0.635089))))
769
+ ((p.ph_ctype is r)
770
+ ((R:SylStructure.parent.syl_out < 1.1)
771
+ ((0.762012 0.0139361))
772
+ ((0.567983 -0.454845)))
773
+ ((R:SylStructure.parent.syl_codasize < 2.2)
774
+ ((ph_ctype is l)
775
+ ((1.18845 0.809091))
776
+ ((R:SylStructure.parent.position_type is initial)
777
+ ((ph_ctype is n)
778
+ ((0.773548 -0.277092))
779
+ ((1.01586 0.281001)))
780
+ ((p.ph_ctype is 0)
781
+ ((1.06831 0.699145))
782
+ ((0.924189 0.241873)))))
783
+ ((R:SylStructure.parent.syl_break is 0)
784
+ ((ph_ctype is n)
785
+ ((0.592321 -0.470784))
786
+ ((0.778688 -0.072112)))
787
+ ((n.ph_ctype is s)
788
+ ((1.08848 0.0733489))
789
+ ((1.25674 0.608371))))))))))
790
+ ((pos_in_syl < 0.7)
791
+ ((p.ph_vlng is 0)
792
+ ((R:SylStructure.parent.position_type is mid)
793
+ ((ph_ctype is 0)
794
+ ((ph_vheight is 2)
795
+ ((0.456225 -0.293282))
796
+ ((0.561529 -0.0816115)))
797
+ ((0.6537 -0.504024)))
798
+ ((ph_ctype is s)
799
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
800
+ ((1.31586 0.98395))
801
+ ((R:SylStructure.parent.position_type is single)
802
+ ((0.816869 0.634789))
803
+ ((R:SylStructure.parent.syl_out < 4.4)
804
+ ((1.05578 0.479029))
805
+ ((R:SylStructure.parent.asyl_in < 0.4)
806
+ ((1.11813 0.143214))
807
+ ((0.87178 0.406834))))))
808
+ ((n.ph_ctype is n)
809
+ ((R:SylStructure.parent.last_accent < 0.6)
810
+ ((0.838154 -0.415599))
811
+ ((0.924024 0.110288)))
812
+ ((seg_onsetcoda is coda)
813
+ ((nn.ph_vfront is 2)
814
+ ((0.670096 0.0314187))
815
+ ((n.ph_ctype is f)
816
+ ((1.00363 0.693893))
817
+ ((R:SylStructure.parent.syl_out < 6)
818
+ ((0.772363 0.215675))
819
+ ((0.920313 0.574068)))))
820
+ ((R:SylStructure.parent.position_type is final)
821
+ ((0.673837 -0.458142))
822
+ ((R:SylStructure.parent.sub_phrases < 2.8)
823
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
824
+ ((0.894817 0.304628))
825
+ ((ph_ctype is n)
826
+ ((0.787302 -0.23094))
827
+ ((R:SylStructure.parent.asyl_in < 1.2)
828
+ ((ph_ctype is f)
829
+ ((R:SylStructure.parent.last_accent < 0.5)
830
+ ((1.12278 0.326954))
831
+ ((0.802236 -0.100616)))
832
+ ((0.791255 -0.0919132)))
833
+ ((0.95233 0.219053)))))
834
+ ((R:SylStructure.parent.position_type is initial)
835
+ ((ph_ctype is f)
836
+ ((1.0616 0.216118))
837
+ ((0.703216 -0.00834086)))
838
+ ((ph_ctype is f)
839
+ ((1.22277 0.761763))
840
+ ((0.904811 0.332721))))))))))
841
+ ((ph_vheight is 0)
842
+ ((p.ph_vlng is s)
843
+ ((0.873379 0.217178))
844
+ ((n.ph_ctype is r)
845
+ ((0.723915 1.29451))
846
+ ((n.ph_ctype is 0)
847
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 1)
848
+ ((R:SylStructure.parent.sub_phrases < 4)
849
+ ((seg_coda_fric is 0)
850
+ ((p.ph_vlng is l)
851
+ ((0.849154 0.945261))
852
+ ((0.633261 0.687498)))
853
+ ((0.728546 0.403076)))
854
+ ((0.850962 1.00255)))
855
+ ((0.957999 1.09113)))
856
+ ((0.85771 0.209045)))))
857
+ ((ph_vheight is 2)
858
+ ((0.803401 -0.0544067))
859
+ ((0.681353 0.256045)))))
860
+ ((n.ph_ctype is f)
861
+ ((ph_ctype is s)
862
+ ((p.ph_vlng is 0)
863
+ ((0.479307 -0.9673))
864
+ ((0.700477 -0.351397)))
865
+ ((ph_ctype is f)
866
+ ((0.73467 -0.6233))
867
+ ((R:SylStructure.parent.syl_break is 0)
868
+ ((p.ph_ctype is s)
869
+ ((0.56282 0.266234))
870
+ ((p.ph_ctype is r)
871
+ ((0.446203 -0.302281))
872
+ ((R:SylStructure.parent.sub_phrases < 2.7)
873
+ ((ph_ctype is 0)
874
+ ((0.572016 -0.0102436))
875
+ ((0.497358 -0.274514)))
876
+ ((0.545477 0.0482177)))))
877
+ ((ph_vlng is s)
878
+ ((0.805269 0.888495))
879
+ ((ph_ctype is n)
880
+ ((0.869854 0.653018))
881
+ ((R:SylStructure.parent.sub_phrases < 2.2)
882
+ ((0.735031 0.0612886))
883
+ ((0.771859 0.346637))))))))
884
+ ((R:SylStructure.parent.syl_codasize < 1.4)
885
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.3)
886
+ ((R:SylStructure.parent.position_type is initial)
887
+ ((0.743458 0.0411808))
888
+ ((1.13068 0.613305)))
889
+ ((pos_in_syl < 1.2)
890
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 1)
891
+ ((1.11481 0.175467))
892
+ ((0.937893 -0.276407)))
893
+ ((0.74264 -0.550878))))
894
+ ((pos_in_syl < 3.4)
895
+ ((seg_onsetcoda is coda)
896
+ ((ph_ctype is r)
897
+ ((n.ph_ctype is s)
898
+ ((0.714319 -0.240328))
899
+ ((p.ph_ctype is 0)
900
+ ((0.976987 0.330352))
901
+ ((1.1781 -0.0816682))))
902
+ ((ph_ctype is l)
903
+ ((n.ph_ctype is 0)
904
+ ((1.39137 0.383533))
905
+ ((0.725585 -0.324515)))
906
+ ((ph_vheight is 3)
907
+ ((ph_vlng is d)
908
+ ((0.802626 -0.62487))
909
+ ((n.ph_ctype is r)
910
+ ((0.661091 -0.513869))
911
+ ((R:SylStructure.parent.position_type is initial)
912
+ ((R:SylStructure.parent.parent.word_numsyls < 2.4)
913
+ ((0.482285 0.207874))
914
+ ((0.401601 -0.0204711)))
915
+ ((0.733755 0.397372)))))
916
+ ((n.ph_ctype is r)
917
+ ((p.ph_ctype is 0)
918
+ ((pos_in_syl < 1.2)
919
+ ((0.666325 0.271734))
920
+ ((nn.ph_vheight is 0)
921
+ ((0.642401 -0.261466))
922
+ ((0.783684 -0.00956571))))
923
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
924
+ ((0.692225 -0.381895))
925
+ ((0.741921 -0.0898767))))
926
+ ((nn.ph_vfront is 2)
927
+ ((ph_ctype is s)
928
+ ((0.697527 -1.12626))
929
+ ((n.ph_ctype is s)
930
+ ((ph_vlng is 0)
931
+ ((R:SylStructure.parent.sub_phrases < 2.4)
932
+ ((0.498719 -0.906926))
933
+ ((0.635342 -0.625651)))
934
+ ((0.45886 -0.385089)))
935
+ ((0.848596 -0.359702))))
936
+ ((p.ph_vlng is a)
937
+ ((p.ph_ctype is 0)
938
+ ((0.947278 0.216904))
939
+ ((0.637933 -0.394349)))
940
+ ((p.ph_ctype is r)
941
+ ((R:SylStructure.parent.syl_break is 0)
942
+ ((0.529903 -0.860573))
943
+ ((0.581378 -0.510488)))
944
+ ((ph_vlng is 0)
945
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
946
+ ((seg_onset_stop is 0)
947
+ ((R:SylStructure.parent.syl_break is 0)
948
+ ((p.ph_vlng is d)
949
+ ((0.768363 0.0108428))
950
+ ((ph_ctype is s)
951
+ ((0.835756 -0.035054))
952
+ ((ph_ctype is f)
953
+ ((p.ph_vlng is s)
954
+ ((0.602016 -0.179727))
955
+ ((0.640126 -0.297341)))
956
+ ((0.674628 -0.542602)))))
957
+ ((ph_ctype is s)
958
+ ((0.662261 -0.60496))
959
+ ((0.662088 -0.432058))))
960
+ ((R:SylStructure.parent.syl_out < 4.4)
961
+ ((0.582448 -0.389079))
962
+ ((ph_ctype is s)
963
+ ((0.60413 -0.73564))
964
+ ((0.567153 -0.605444)))))
965
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
966
+ ((0.761115 -0.827377))
967
+ ((ph_ctype is n)
968
+ ((0.855183 -0.275338))
969
+ ((R:SylStructure.parent.syl_break is 0)
970
+ ((0.788288 -0.802801))
971
+ ((R:SylStructure.parent.syl_codasize < 2.2)
972
+ ((0.686134 -0.371234))
973
+ ((0.840184 -0.772883)))))))
974
+ ((pos_in_syl < 1.2)
975
+ ((R:SylStructure.parent.syl_break is 0)
976
+ ((n.ph_ctype is n)
977
+ ((0.423592 -0.655006))
978
+ ((R:SylStructure.parent.syl_out < 4.4)
979
+ ((0.595269 -0.303751))
980
+ ((0.478433 -0.456882))))
981
+ ((0.688133 -0.133182)))
982
+ ((seg_onset_stop is 0)
983
+ ((1.27464 0.114442))
984
+ ((0.406837 -0.167545))))))))))))
985
+ ((ph_ctype is r)
986
+ ((0.462874 -0.87695))
987
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
988
+ ((0.645442 -0.640572))
989
+ ((0.673717 -0.321322)))))
990
+ ((0.61008 -0.925472))))))))
991
+ ;; RMSE 0.8085 Correlation is 0.5899 Mean (abs) Error 0.6024 (0.5393)
992
+
993
+
994
+ ))
995
+
996
+ (provide 'apml_kaldurtreeZ)
CosyVoice-ttsfrd/resource/festival/cart_aux.scm ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996-2011 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; Some functions for manipulating decision trees
35
+ ;;;
36
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
37
+
38
+ (define (cart_prune_tree_thresh tree threshold default)
39
+ "(prune_cart_tree_thresh TREE THRESHOLD DEFAULT)
40
+ Prune the classification tree TREE so that all tail nodes with
41
+ a prediction probabality less than THRESHOLD and changed to return
42
+ DEFAULT instead. This may be used when different mistakes have actually
43
+ different penalites hence some control of the defaults need to be
44
+ controlled."
45
+ (cond
46
+ ((cdr tree) ;; a question
47
+ (list
48
+ (car tree)
49
+ (cart_prune_tree_thresh (car (cdr tree)) threshold default)
50
+ (cart_prune_tree_thresh (car (cdr (cdr tree))) threshold default)))
51
+ ((< (cart_class_probability (car tree)) threshold)
52
+ (list (list (list threshold default) default)))
53
+ (t ;; leave asis
54
+ tree)))
55
+
56
+ (define (cart_class_probability class)
57
+ "(cart_class_probability CLASS)
58
+ Returns the probability of the best class in the cart leaf node CLASS.
59
+ If CLASS simple has a value and now probabilities the probabilities
60
+ it assume to be 1.0."
61
+ (let ((val 0.0))
62
+ (set! val (assoc (car (last class)) class))
63
+ (if val
64
+ (car (cdr val))
65
+ 1.0)))
66
+
67
+ (define (cart_class_prune_merge tree)
68
+ "(cart_class_prune_merge tree)
69
+ Prune all sub trees which are pure. That is they all predict the
70
+ same class. This can happen when some other pruning technique
71
+ as modified a sub-tree now making it pure."
72
+ (let ((pure (cart_tree_pure tree)))
73
+ (cond
74
+ (pure pure)
75
+ ((cdr tree);; a question
76
+ (list
77
+ (car tree)
78
+ (cart_class_prune_merge (car (cdr tree)))
79
+ (cart_class_prune_merge (car (cdr (cdr tree))))))
80
+ (t;; a leaf leave asis
81
+ tree))))
82
+
83
+ (define (cart_tree_pure tree)
84
+ "(cart_tree_pure tree)
85
+ Returns a probability density function if all nodes in this tree
86
+ predict the same class and nil otherwise"
87
+ (cond
88
+ ((cdr tree)
89
+ (let ((left (cart_tree_pure (car (cdr tree))))
90
+ (right (cart_tree_pure (car (cdr (cdr tree))))))
91
+ (cond
92
+ ((not left) nil)
93
+ ((not right) nil)
94
+ ((equal? (car (last left)) (car (last right)))
95
+ left)
96
+ (t
97
+ nil))))
98
+ (t ;; its a leaf, so of couse its pure
99
+ tree)))
100
+
101
+ (define (cart_simplify_tree tree map)
102
+ "(cart_simplify_tree TREE)
103
+ Simplify a CART tree by reducing probability density functions to
104
+ simple single clasifications (no probabilities). This removes valuable
105
+ information from the tree but makes them smaller easier to read by humans
106
+ and faster to read by machines. Also the classes may be mapped by the assoc
107
+ list in map. The bright ones amongst you will note this could be
108
+ better and merge 'is' operators into 'in' operators in some situations
109
+ especially if you are ignoring actual probability distributions."
110
+ (cond
111
+ ((cdr tree)
112
+ (list
113
+ (car tree)
114
+ (cart_simplify_tree (car (cdr tree)) map)
115
+ (cart_simplify_tree (car (cdr (cdr tree))) map)))
116
+ (t
117
+ (let ((class (car (last (car tree)))))
118
+ (if (assoc class map)
119
+ (list (cdr (assoc class map)))
120
+ (list (last (car tree))))))))
121
+
122
+ (define (cart_simplify_tree2 tree)
123
+ "(cart_simplify_tree2 TREE)
124
+ Simplify a CART tree by reducing probability density functions to
125
+ only non-zero probabilities."
126
+ (cond
127
+ ((cdr tree)
128
+ (list
129
+ (car tree)
130
+ (cart_simplify_tree2 (car (cdr tree)))
131
+ (cart_simplify_tree2 (car (cdr (cdr tree))))))
132
+ (t
133
+ (list
134
+ (cart_remove_zero_probs (car tree))))))
135
+
136
+ (define (cart_remove_zero_probs pdf)
137
+ "(cart_remove_zero_probs pdf)
138
+ Removes zero probability classes in pdf, last in list
139
+ is best in class (as from cart leaf node)."
140
+ (cond
141
+ ((null (cdr pdf)) pdf)
142
+ ((equal? 0 (car (cdr (car pdf))))
143
+ (cart_remove_zero_probs (cdr pdf)))
144
+ (t
145
+ (cons
146
+ (car pdf)
147
+ (cart_remove_zero_probs (cdr pdf))))))
148
+
149
+ (define (cart_interpret_debug i tree)
150
+ "(cart_interpret_debug i tree)
151
+ In comparing output between different implementations (flite vs festival)
152
+ This prints out the details as it interprets the tree."
153
+ (cond
154
+ ((cdr tree) ;; question
155
+ (format t "%s %s %s\n" (car (car tree)) (upcase (cadr (car tree)))
156
+ (car (cddr (car tree))))
157
+ (set! a (item.feat i (car (car tree))))
158
+ (format t "%s\n" a)
159
+ (cond
160
+ ((string-equal "is" (cadr (car tree)))
161
+ (if (string-equal a (car (cddr (car tree))))
162
+ (begin
163
+ (format t " YES\n")
164
+ (cart_interpret_debug i (car (cdr tree))))
165
+ (begin
166
+ (format t " NO\n")
167
+ (cart_interpret_debug i (car (cddr tree))))))
168
+ ((string-equal "<" (cadr (car tree)))
169
+ (if (< (parse-number a) (parse-number (car (cddr (car tree)))))
170
+ (begin
171
+ (format t " YES\n")
172
+ (cart_interpret_debug i (car (cdr tree))))
173
+ (begin
174
+ (format t " NO\n")
175
+ (cart_interpret_debug i (car (cddr tree))))))
176
+ (t
177
+ (format t "unknown q type %l\n" (car tree)))))
178
+ (t ;; leaf
179
+ (car tree)
180
+ )))
181
+
182
+ ;;;
183
+ ;;; Prediction tree for OLS trees
184
+ ;;; applies OLS coefficients from appropriate leaf of tree
185
+ ;;;
186
+ (define (ols_tree_predict i tree)
187
+ ;; Surprisingly simple function does the necessary work
188
+ (let ((p (wagon i tree)))
189
+ (apply
190
+ +
191
+ (cons
192
+ (cadr (car (car p))) ;; Intercept
193
+ (mapcar
194
+ (lambda (fp)
195
+ ;; get feature value and multiple by coefficent
196
+ (* (parse-number (item.feat i (car fp))) (cadr fp)))
197
+ (cdr (car p)))))))
198
+
199
+ (provide 'cart_aux)
200
+
CosyVoice-ttsfrd/resource/festival/clunits.scm ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Carnegie Mellon University and ;;
4
+ ;;; Centre for Speech Technology Research ;;
5
+ ;;; University of Edinburgh, UK ;;
6
+ ;;; Copyright (c) 1998-2001 ;;
7
+ ;;; All Rights Reserved. ;;
8
+ ;;; ;;
9
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
10
+ ;;; this software and its documentation without restriction, including ;;
11
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
12
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
13
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
14
+ ;;; the following conditions: ;;
15
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
16
+ ;;; conditions and the following disclaimer. ;;
17
+ ;;; 2. Any modifications must be clearly marked as such. ;;
18
+ ;;; 3. Original authors' names are not deleted. ;;
19
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
20
+ ;;; derived from this software without specific prior written ;;
21
+ ;;; permission. ;;
22
+ ;;; ;;
23
+ ;;; THE UNIVERSITY OF EDINBURGH, CARNEGIE MELLON UNIVERSITY AND THE ;;
24
+ ;;; CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH REGARD TO ;;
25
+ ;;; THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY ;;
26
+ ;;; AND FITNESS, IN NO EVENT SHALL THE UNIVERSITY OF EDINBURGH, CARNEGIE ;;
27
+ ;;; MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, ;;
28
+ ;;; INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ;;
29
+ ;;; RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION ;;
30
+ ;;; OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF ;;
31
+ ;;; OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ;;
32
+ ;;; ;;
33
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34
+ ;;;
35
+ ;;; Cluster Unit selection support (Black and Taylor Eurospeech '97)
36
+ ;;;
37
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
38
+ ;;;
39
+ ;;; Run-time support, selection and synthesis and some debugging functions
40
+ ;;;
41
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42
+
43
+ (require_module 'clunits)
44
+
45
+ (defvar cluster_synth_pre_hooks nil)
46
+ (defvar cluster_synth_post_hooks nil)
47
+
48
+ (defvar clunits_time time) ;; some old voices might use this
49
+
50
+ (defSynthType Cluster
51
+ (apply_hooks cluster_synth_pre_hooks utt)
52
+ (Clunits_Select utt)
53
+ (Clunits_Get_Units utt)
54
+ (Clunits_Join_Units utt)
55
+ (apply_hooks cluster_synth_post_hooks utt)
56
+ utt
57
+ )
58
+
59
+ (define (Clunits_Join_Units utt)
60
+ "(Clunits_Join_Units utt)
61
+ Join the preselected and gotten units into a waveform."
62
+ (let ((join_method (get_param 'join_method clunits_params 'simple)))
63
+ ;; Choice of function to put them together
64
+ (cond
65
+ ((string-equal join_method 'windowed)
66
+ (Clunits_Windowed_Wave utt)
67
+ (clunits::fix_segs_durs utt))
68
+ ((string-equal join_method 'smoothedjoin)
69
+ (Clunits_SmoothedJoin_Wave utt)
70
+ (clunits::fix_segs_durs utt))
71
+ ((string-equal join_method 'none)
72
+ t)
73
+ ((string-equal join_method 'modified_lpc)
74
+ (defvar UniSyn_module_hooks nil)
75
+ (Param.def "unisyn.window_name" "hanning")
76
+ (Param.def "unisyn.window_factor" 1.0)
77
+ (Parameter.def 'us_sigpr 'lpc)
78
+ (mapcar
79
+ (lambda (u s)
80
+ (item.set_feat s "source_end" (item.feat u "end")))
81
+ (utt.relation.items utt 'Unit)
82
+ (utt.relation.items utt 'Segment))
83
+ (us_unit_concat utt)
84
+ (if (not (member 'f0 (utt.relationnames utt)))
85
+ (targets_to_f0 utt))
86
+ (if (utt.relation.last utt 'Segment)
87
+ (set! pm_end (+ (item.feat (utt.relation.last utt 'Segment) "end")
88
+ 0.02))
89
+ (set! pm_end 0.02))
90
+ (us_f0_to_pitchmarks utt 'f0 'TargetCoef pm_end)
91
+ (us_mapping utt 'segment_single)
92
+ (us_generate_wave utt (Parameter.get 'us_sigpr)
93
+ 'analysis_period))
94
+ ((string-equal join_method 'smoothed_lpc)
95
+ ; (format t "smoothed_lpc\n")
96
+ (defvar UniSyn_module_hooks nil)
97
+ (Param.def "unisyn.window_name" "hanning")
98
+ (Param.def "unisyn.window_factor" 1.0)
99
+ (Parameter.def 'us_sigpr 'lpc)
100
+ (mapcar
101
+ (lambda (u s)
102
+ (item.set_feat s "source_end" (item.feat u "end"))
103
+ (item.set_feat s "unit_duration"
104
+ (- (item.feat u "seg_end") (item.feat u "seg_start")))
105
+ )
106
+ (utt.relation.items utt 'Unit)
107
+ (utt.relation.items utt 'Segment))
108
+ (us_unit_concat utt)
109
+ (mapcar
110
+ (lambda (u s)
111
+ (item.set_feat s "num_frames" (item.feat u "num_frames")))
112
+ (utt.relation.items utt 'Unit)
113
+ (utt.relation.items utt 'Segment))
114
+ (if (not (member 'f0 (utt.relationnames utt)))
115
+ (targets_to_f0 utt))
116
+ (if (utt.relation.last utt 'Segment)
117
+ (set! pm_end (+ (item.feat (utt.relation.last utt 'Segment) "end")
118
+ 0.02))
119
+ (set! pm_end 0.02))
120
+ (us_f0_to_pitchmarks utt 'f0 'TargetCoef pm_end)
121
+ (cl_mapping utt clunits_params)
122
+ (us_generate_wave utt (Parameter.get 'us_sigpr)
123
+ 'analysis_period))
124
+ (t
125
+ (Clunits_Simple_Wave utt)))
126
+ utt
127
+ )
128
+ )
129
+
130
+ (define (clunits::units_selected utt filename)
131
+ "(clunits::units_selected utt filename)
132
+ Output selected unitsfile indexes for each unit in the given utterance.
133
+ Results saved in given file name, or stdout if filename is \"-\"."
134
+ (let ((fd (if (string-equal filename "-")
135
+ t
136
+ (fopen filename "w")))
137
+ (end 0)
138
+ (sample_rate
139
+ (cadr (assoc 'sample_rate (wave.info (utt.wave utt))))))
140
+ (format fd "#\n")
141
+ (mapcar
142
+ (lambda (s)
143
+ (let ((dur (/ (- (item.feat s "samp_end")
144
+ (item.feat s "samp_start"))
145
+ sample_rate))
146
+ (start (/ (item.feat s "samp_start") sample_rate)))
147
+ (set! end (+ end dur))
148
+ (format fd "%f 125 %s ; %s %10s %f %f %f\n"
149
+ end
150
+ (string-before (item.name s) "_")
151
+ (item.name s)
152
+ (item.feat s "fileid")
153
+ (item.feat s "unit_start")
154
+ (item.feat s "unit_middle")
155
+ (item.feat s "unit_end"))
156
+ ))
157
+ (utt.relation.items utt 'Unit))
158
+ (if (not (string-equal filename "-"))
159
+ (fclose fd))
160
+ t))
161
+
162
+ (define (clunits::units_segs utt filename)
163
+ "(clunits::units_segs utt filename)
164
+ Svaes the unit selections (alone) for display."
165
+ (let ((fd (if (string-equal filename "-")
166
+ t
167
+ (fopen filename "w")))
168
+ (end 0)
169
+ (sample_rate
170
+ (cadr (assoc 'sample_rate (wave.info (utt.wave utt))))))
171
+ (format fd "#\n")
172
+ (mapcar
173
+ (lambda (s)
174
+ (let ((dur (/ (- (item.feat s "samp_end")
175
+ (item.feat s "samp_start"))
176
+ sample_rate))
177
+ (start (/ (item.feat s "samp_start") sample_rate)))
178
+ (set! end (+ end dur))
179
+ (format fd "%f 125 %s \n"
180
+ end
181
+ (string-before (item.name s) "_")
182
+ ; (item.name s)
183
+ )
184
+ ))
185
+ (utt.relation.items utt 'Unit))
186
+ (if (not (string-equal filename "-"))
187
+ (fclose fd))
188
+ t))
189
+
190
+ (define (clunits::fix_segs_durs utt)
191
+ "(clunits::fix_segs_durs utt)
192
+ Takes the actual unit times and places then back on the segs."
193
+ (let ((end 0)
194
+ (sample_rate
195
+ (cadr (assoc 'sample_rate (wave.info (utt.wave utt))))))
196
+ (mapcar
197
+ (lambda (u s)
198
+ (let ((dur (/ (- (item.feat u "samp_end")
199
+ (item.feat u "samp_start"))
200
+ sample_rate))
201
+ (seg_start (/ (- (item.feat u "samp_seg_start")
202
+ (item.feat u "samp_start"))
203
+ sample_rate)))
204
+ (if (item.prev s)
205
+ (item.set_feat (item.prev s) "end"
206
+ (+ (item.feat s "p.end") seg_start)))
207
+ (set! end (+ end dur))
208
+ (item.set_feat s "end" end)))
209
+ (utt.relation.items utt 'Unit)
210
+ (utt.relation.items utt 'Segment)
211
+ )
212
+ utt))
213
+
214
+ (define (clunits::display utt)
215
+ "(clunits::display utt)
216
+ Display utterance with emulabel. Note this saves files in
217
+ scratch/wav/ and scratch/lab/."
218
+ (let ((id "cl01"))
219
+ (utt.save.wave utt (format nil "scratch/wav/%s.wav" id))
220
+ (utt.save.segs utt (format nil "scratch/lab/%s.lab" id))
221
+ (system "cd scratch; emulabel ../etc/emu_lab cl01 &")
222
+ t))
223
+
224
+ ; (define (clunits::debug_resynth_units utt)
225
+ ; "(clunits::debug_resynth_units utt)
226
+ ; Check each of the units in utt against the related label
227
+ ; files and re-synth with any given new boundaries. Note this is
228
+ ; will only work if the segment still overlaps with its original and
229
+ ; also note that with a rebuild of the clunits db a complete different
230
+ ; set of units may be selected for this utterance."
231
+ ; (let ()
232
+ ; (mapcar
233
+ ; (lambda (unit)
234
+ ; (clunits::check_unit_boundaries unit))
235
+ ; (utt.relation.items utt 'Unit))
236
+ ; ;; This can't be done like this ...
237
+ ; (Clunits_Get_Units utt) ;; get unit signal/track stuff
238
+ ; (Clunits_Join_Units utt) ;; make a complete waveform
239
+ ; (apply_hooks cluster_synth_post_hooks utt)
240
+ ; utt)
241
+ ; )
242
+
243
+ (define (clunits::join_parameters utt)
244
+ "(clunits::join_parameters utt)
245
+ Join selected paremeters (rather than the signal), used in F0 and
246
+ Articulatory selection."
247
+ (let ((params nil)
248
+ (num_channels 0)
249
+ (num_frames 0 ))
250
+
251
+ (mapcar
252
+ (lambda (unit)
253
+ (set! num_frames
254
+ (+ num_frames
255
+ (track.num_frames (item.feat unit "coefs"))))
256
+ (set! num_channels (track.num_channels (item.feat unit "coefs")))
257
+ (format t "coounting %d %d\n" num_frames num_channels)
258
+ )
259
+ (utt.relation.items utt 'Unit))
260
+
261
+ (set! params (track.resize nil 0 num_channels))
262
+
263
+ (mapcar
264
+ (lambda (unit)
265
+ (set! frames 0)
266
+ (format t "inserting \n")
267
+ (format t "%l %l %l %l %l\n"
268
+ params (track.num_frames params)
269
+ (item.feat unit "coefs") 0
270
+ (track.num_frames (item.feat unit "coefs")))
271
+ (track.insert
272
+ params (track.num_frames params)
273
+ (item.feat unit "coefs") 0
274
+ (track.num_frames (item.feat unit "coefs")))
275
+ )
276
+ (utt.relation.items utt 'Unit))
277
+
278
+ (utt.relation.create utt "AllCoefs")
279
+ (set! coefs_item (utt.relation.append utt "AllCoefs"))
280
+ (item.set_feat coefs_item "name" "AllCoefs")
281
+ (item.set_feat coefs_item "AllCoefs" params)
282
+
283
+ utt
284
+ ))
285
+
286
+
287
+ (provide 'clunits)
CosyVoice-ttsfrd/resource/festival/clunits_build.scm ADDED
@@ -0,0 +1,479 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Carnegie Mellon University and ;;
4
+ ;;; Centre for Speech Technology Research ;;
5
+ ;;; University of Edinburgh, UK ;;
6
+ ;;; Copyright (c) 1998-2005 ;;
7
+ ;;; All Rights Reserved. ;;
8
+ ;;; ;;
9
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
10
+ ;;; this software and its documentation without restriction, including ;;
11
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
12
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
13
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
14
+ ;;; the following conditions: ;;
15
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
16
+ ;;; conditions and the following disclaimer. ;;
17
+ ;;; 2. Any modifications must be clearly marked as such. ;;
18
+ ;;; 3. Original authors' names are not deleted. ;;
19
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
20
+ ;;; derived from this software without specific prior written ;;
21
+ ;;; permission. ;;
22
+ ;;; ;;
23
+ ;;; THE UNIVERSITY OF EDINBURGH, CARNEGIE MELLON UNIVERSITY AND THE ;;
24
+ ;;; CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH REGARD TO ;;
25
+ ;;; THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY ;;
26
+ ;;; AND FITNESS, IN NO EVENT SHALL THE UNIVERSITY OF EDINBURGH, CARNEGIE ;;
27
+ ;;; MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, ;;
28
+ ;;; INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ;;
29
+ ;;; RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION ;;
30
+ ;;; OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF ;;
31
+ ;;; OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ;;
32
+ ;;; ;;
33
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34
+ ;;;
35
+ ;;; Cluster Unit selection support (Black and Taylor Eurospeech '97)
36
+ ;;;
37
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
38
+ ;;;
39
+ ;;; clunits build support
40
+ ;;;
41
+ ;;; There are five stages to this
42
+ ;;; Load in all utterances
43
+ ;;; Load in their coefficients
44
+ ;;; Collect together the units of the same type
45
+ ;;; build distance tables from them
46
+ ;;; dump features for them
47
+ ;;;
48
+
49
+ (require_module 'clunits) ;; C++ modules support
50
+ (require 'clunits) ;; run time scheme support
51
+
52
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
53
+
54
+ (define (do_all)
55
+ (let ()
56
+
57
+ (format t "Loading utterances and sorting types\n")
58
+ (set! utterances (acost:db_utts_load clunits_params))
59
+ (set! unittypes (acost:find_same_types utterances clunits_params))
60
+ (acost:name_units unittypes)
61
+
62
+ (format t "Dumping features for clustering\n")
63
+ (acost:dump_features unittypes utterances clunits_params)
64
+
65
+ (format t "Loading coefficients\n")
66
+ (acost:utts_load_coeffs utterances)
67
+ ;; If you are short of diskspace try this
68
+ (acost:disttabs_and_clusters unittypes clunits_params)
69
+
70
+ ;; or if you have lots of diskspace try
71
+ ; (format t "Building distance tables\n")
72
+ ; (acost:build_disttabs unittypes clunits_params)
73
+
74
+ ; ;; Build the cluster trees (requires disttabs and features)
75
+ ; (format t "Building cluster trees\n")
76
+ ; (acost:find_clusters (mapcar car unittypes) clunits_params)
77
+
78
+ ;; Tidy up and put things together
79
+ (acost:collect_trees (mapcar car unittypes) clunits_params)
80
+
81
+ (format t "Saving unit catalogue\n")
82
+ (acost:save_catalogue utterances clunits_params)
83
+
84
+ )
85
+ )
86
+
87
+ (define (do_init)
88
+ (set! utterances (acost:db_utts_load clunits_params))
89
+ (set! unittypes (acost:find_same_types utterances clunits_params))
90
+ (acost:name_units unittypes)
91
+ t)
92
+
93
+ (define (acost:disttabs_and_clusters unittypes clunits_params)
94
+ "(acost:disttabs_and_custers unittypes)
95
+ Cause it uses so much diskspace, build each table individually
96
+ and them the cluster, removing the table before moving on to the
97
+ next."
98
+ (mapcar
99
+ (lambda (uu)
100
+ (acost:build_disttabs (list uu) clunits_params)
101
+ (acost:find_clusters (list (car uu)) clunits_params)
102
+ (delete-file
103
+ (format nil "%s/%s/%s%s"
104
+ (get_param 'db_dir clunits_params "./")
105
+ (get_param 'disttabs_dir clunits_params "disttabs/")
106
+ (car uu)
107
+ (get_param 'disttabs_ext clunits_params ".disttab")))
108
+ )
109
+ unittypes)
110
+ t)
111
+
112
+ (define (acost:db_utts_load params)
113
+ "(acost:db_utts_load params)
114
+ Load in all utterances identified in database."
115
+ (let ((files (car (cdr (assoc 'files params)))))
116
+ (set! acost:all_utts
117
+ (mapcar
118
+ (lambda (fname)
119
+ (set! utt_seg (Utterance Text fname))
120
+ (utt.load utt_seg
121
+ (string-append
122
+ (get_param 'db_dir params "./")
123
+ (get_param 'utts_dir params "festival/utts/")
124
+ fname
125
+ (get_param 'utts_ext params ".utt")))
126
+ utt_seg)
127
+ files))))
128
+
129
+ (define (acost:utts_load_coeffs utterances)
130
+ "(acost:utts_load_coeffs utterances)
131
+ Loading the acoustic coefficients of for each utterance."
132
+ (mapcar
133
+ (lambda (utt) (acost:utt.load_coeffs utt clunits_params))
134
+ utterances)
135
+ t)
136
+
137
+ (define (acost:find_same_types utterances params)
138
+ "(acost:find_same_types utterances)
139
+ Find all the stream items of the same type and collect them into
140
+ lists of that type."
141
+ (let ((clunit_name_feat (get_param 'clunit_name_feat params "name"))
142
+ (clunit_relation (get_param 'clunit_relation params "Segment")))
143
+ (set! acost:unittypes nil)
144
+ (mapcar
145
+ (lambda (u)
146
+ (mapcar
147
+ (lambda (s)
148
+ (let ((cname (item.feat s clunit_name_feat)))
149
+ (if (not (string-equal "ignore" cname))
150
+ (begin
151
+ (item.set_feat s "clunit_name" (item.feat s clunit_name_feat))
152
+ (let ((p (assoc (item.feat s "clunit_name") acost:unittypes)))
153
+ (if p
154
+ (set-cdr! p (cons s (cdr p)))
155
+ (set! acost:unittypes
156
+ (cons
157
+ (list (item.feat s "clunit_name") s)
158
+ acost:unittypes))))))))
159
+ (utt.relation.items u clunit_relation)))
160
+ utterances)
161
+ (acost:prune_unittypes acost:unittypes params)))
162
+
163
+ (define (acost:prune_unittypes unittypes params)
164
+ "(acost:prune_unittypes unittypes)
165
+ If unit types are complex (contain an _) then remove all unittypes sets
166
+ with less than unittype_prune_threshold (typically 3)."
167
+ (if (string-matches (car (car unittypes)) ".*_.*")
168
+ (let ((ut nil) (pt (get_param 'unittype_prune_threshold params 0)))
169
+ (while unittypes
170
+ (if (or (eq? pt 0)
171
+ (> (length (cdr (car unittypes))) pt))
172
+ (set! ut (cons (car unittypes) ut)))
173
+ (set! unittypes (cdr unittypes)))
174
+ (reverse ut))
175
+ unittypes))
176
+
177
+ (define (acost:name_units unittypes)
178
+ "(acost:name_units unittypes)
179
+ Names each unit with a unique id and number the occurrences of each type."
180
+ (let ((idnum 0) (tynum 0))
181
+ (mapcar
182
+ (lambda (s)
183
+ (set! tynum 0)
184
+ (mapcar
185
+ (lambda (si)
186
+ (item.set_feat si "unitid" idnum)
187
+ (set! idnum (+ 1 idnum))
188
+ (item.set_feat si "occurid" tynum)
189
+ (set! tynum (+ 1 tynum)))
190
+ (cdr s))
191
+ (format t "units \"%s\" %d\n" (car s) tynum))
192
+ unittypes)
193
+ (format t "total units %d\n" idnum)
194
+ idnum))
195
+
196
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
197
+ ;;; Generating feature files
198
+
199
+ (define (acost:dump_features unittypes utterances params)
200
+ "(acost:dump_features unittypes utterances params)
201
+ Do multiple passes over the utterances for each unittype and
202
+ dump the desired features. This would be easier if utterances
203
+ weren't require for feature functions."
204
+ (mapcar
205
+ (lambda (utype)
206
+ (acost:dump_features_utype
207
+ (car utype)
208
+ (cdr utype)
209
+ utterances
210
+ params))
211
+ unittypes)
212
+ t)
213
+
214
+ (define (acost:dump_features_utype utype uitems utterances params)
215
+ "(acost:dump_features_utype utype utterances params)
216
+ Dump features for all items of type utype."
217
+ (let ((fd (fopen
218
+ (string-append
219
+ (get_param 'db_dir params "./")
220
+ (get_param 'feats_dir params "festival/feats/")
221
+ utype
222
+ (get_param 'feats_ext params ".feats"))
223
+ "w"))
224
+ (feats (car (cdr (assoc 'feats params)))))
225
+ (format t "Dumping features for %s\n" utype)
226
+ (mapcar
227
+ (lambda (s)
228
+ (mapcar
229
+ (lambda (f)
230
+ (set! fval (unwind-protect (item.feat s f) "0"))
231
+ (if (or (string-equal "" fval)
232
+ (string-equal " " fval))
233
+ (format fd "%l " fval)
234
+ (format fd "%s " fval)))
235
+ feats)
236
+ (format fd "\n"))
237
+ uitems)
238
+ (fclose fd)))
239
+
240
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
241
+ ;;; Tree building functions
242
+
243
+ (defvar wagon-balance-size 0)
244
+
245
+ (define (acost:find_clusters unittypes clunits_params)
246
+ "Use wagon to find the best clusters."
247
+ (mapcar
248
+ (lambda (unittype)
249
+ (build_tree unittype clunits_params))
250
+ unittypes)
251
+ t)
252
+
253
+ (define (build_tree unittype clunits_params)
254
+ "Build tree with Wagon for this unittype."
255
+ (let ((command
256
+ (format nil "%s -desc %s -data '%s' -balance %s -distmatrix '%s' -stop %s -output '%s' %s"
257
+ (get_param 'wagon_progname clunits_params "wagon")
258
+ (if (probe_file
259
+ (string-append
260
+ (get_param 'db_dir clunits_params "./")
261
+ (get_param 'wagon_field_desc clunits_params "wagon")
262
+ "." unittype))
263
+ ;; So there can be unittype specific desc files
264
+ (string-append
265
+ (get_param 'db_dir clunits_params "./")
266
+ (get_param 'wagon_field_desc clunits_params "wagon")
267
+ "." unittype)
268
+ (string-append
269
+ (get_param 'db_dir clunits_params "./")
270
+ (get_param 'wagon_field_desc clunits_params "wagon")))
271
+ (string-append
272
+ (get_param 'db_dir clunits_params "./")
273
+ (get_param 'feats_dir clunits_params "festival/feats/")
274
+ unittype
275
+ (get_param 'feats_ext clunits_params ".feats"))
276
+ (get_param 'wagon_balance_size clunits_params 0)
277
+ (string-append
278
+ (get_param 'db_dir clunits_params "./")
279
+ (get_param 'disttabs_dir clunits_params "festival/disttabs/")
280
+ unittype
281
+ (get_param 'disttabs_ext clunits_params ".disttab"))
282
+ (get_param 'wagon_cluster_size clunits_params 10)
283
+ (string-append
284
+ (get_param 'db_dir clunits_params "./")
285
+ (get_param 'trees_dir clunits_params "festival/trees/")
286
+ unittype
287
+ (get_param 'trees_ext clunits_params ".tree"))
288
+ (get_param 'wagon_other_params clunits_params "")
289
+ )))
290
+ (format t "%s\n" command)
291
+ (system command)))
292
+
293
+ (defvar clunits_tree_minimum_leafs 0)
294
+ (define (acost:collect_trees unittypes params)
295
+ "Collect the trees into one file as an assoc list"
296
+ (let ((fd (fopen
297
+ (string-append
298
+ (get_param 'db_dir params "./")
299
+ (get_param 'trees_dir params "festival/trees/")
300
+ (get_param 'index_name params "all.")
301
+ (get_param 'trees_ext params ".tree"))
302
+ "wb"))
303
+ (tree_pref
304
+ (string-append
305
+ (get_param 'db_dir params "./")
306
+ (get_param 'trees_dir params "festival/trees/")))
307
+ (cluster_prune_limit (get_param 'cluster_prune_limit params 0))
308
+ (cluster_merge (get_param 'cluster_merge params 0)))
309
+ (format fd ";; Autogenerated list of selection trees\n")
310
+ (mapcar
311
+ (lambda (fp)
312
+ (format fd ";; %l %l\n" (car fp) (car (cdr fp))))
313
+ params)
314
+ (format fd "(set! clunits_selection_trees '(\n")
315
+ (mapcar
316
+ (lambda (unit)
317
+ (set! tree (car (load (string-append tree_pref unit ".tree") t)))
318
+ (if (> cluster_prune_limit 0)
319
+ (set! tree (cluster_tree_prune tree cluster_prune_limit)))
320
+ (if (> cluster_merge 0)
321
+ (set! tree (tree_merge_leafs tree cluster_merge)))
322
+ (if (boundp 'temp_tree_convert)
323
+ (set! tree (temp_tree_convert)))
324
+ (if (> (tree_num_units tree) clunits_tree_minimum_leafs)
325
+ (pprintf (list unit tree) fd)))
326
+ unittypes)
327
+ (format fd "))\n")
328
+ (fclose fd)))
329
+
330
+ (define (cluster_tree_prune_in_line prune_limit)
331
+ "(cluster_tree_prune_in_line)
332
+ Prune number of units in each cluster in each tree *by* prune_limit,
333
+ if negative, or *to* prune_limit, if positive."
334
+ (set! sucs_select_trees
335
+ (mapcar
336
+ (lambda (t)
337
+ (cluster_tree_prune t prune_limit))
338
+ sucs_select_trees)))
339
+
340
+ (define (tree_merge_leafs tree depth)
341
+ "(tree_merge_leafs tree depth)
342
+ Merge the leafs of the tree at goven depth. This allows the trees
343
+ to be pruned then the single leafs joined together into larger
344
+ clusters (so the viterbi part has something to do)."
345
+ (let ((num_leafs (tree_num_leafs tree)))
346
+ (cond
347
+ ((< num_leafs 2) tree) ;; already at the foot
348
+ ((< num_leafs depth)
349
+ (tree_collect_leafs tree))
350
+ (t
351
+ (list
352
+ (car tree)
353
+ (tree_merge_leafs (car (cdr tree)) depth)
354
+ (tree_merge_leafs (car (cdr (cdr tree))) depth))))))
355
+
356
+ (define (tree_num_leafs tree)
357
+ "(tree_num_leafs tree)
358
+ Number of leafs of given tree."
359
+ (cond
360
+ ((cdr tree)
361
+ (+
362
+ (tree_num_leafs (car (cdr tree)))
363
+ (tree_num_leafs (car (cdr (cdr tree))))))
364
+ (t
365
+ 1)))
366
+
367
+ (define (tree_num_units tree)
368
+ "(tree_num_units tree)
369
+ Number of leafs of given tree."
370
+ (cond
371
+ ((cdr tree)
372
+ (+
373
+ (tree_num_units (car (cdr tree)))
374
+ (tree_num_units (car (cdr (cdr tree))))))
375
+ (t
376
+ (length (caar tree))
377
+ )))
378
+
379
+ (define (tree_collect_leafs tree)
380
+ "(tree_collect_leafs tree)
381
+ Combine all units in the leafs."
382
+ (cond
383
+ ((cdr tree)
384
+ (let ((a (tree_collect_leafs (car (cdr tree))))
385
+ (b (tree_collect_leafs (car (cdr (cdr tree))))))
386
+ (list
387
+ (list
388
+ (append
389
+ (caar a)
390
+ (caar b))
391
+ 10.0))))
392
+ (t
393
+ tree)))
394
+
395
+ (define (cluster_tree_prune tree prune_limit)
396
+ "(cluster_tree_prune TREE PRUNE_LIMIT)
397
+ Reduce the number of elements in the (CART) tree leaves to PRUNE_LIMIT
398
+ removing the ones further from the cluster centre. Maybe later this should
399
+ have guards on minimum number of units that must remain in the tree and
400
+ a per unit type limit."
401
+ (cond
402
+ ((cdr tree) ;; a question
403
+ (list
404
+ (car tree)
405
+ (cluster_tree_prune (car (cdr tree)) prune_limit)
406
+ (cluster_tree_prune (car (cdr (cdr tree))) prune_limit)))
407
+ (t ;; tree leave
408
+ (list
409
+ (list
410
+ (remove_n_worst
411
+ (car (car tree))
412
+ (if (< prune_limit 0)
413
+ (* -1 prune_limit)
414
+ (- (length (car (car tree))) prune_limit)))
415
+ (car (cdr (car tree))))))))
416
+
417
+ (define (remove_n_worst lll togo)
418
+ "(remove_n_worst lll togo)
419
+ Remove togo worst items from lll."
420
+ (cond
421
+ ((< togo 0)
422
+ lll)
423
+ ((equal? 0 togo)
424
+ lll)
425
+ (t
426
+ (remove_n_worst
427
+ (remove (worst_unit (cdr lll) (car lll)) lll)
428
+ (- togo 1)))))
429
+
430
+ (define (worst_unit lll worst_so_far)
431
+ "(worst_unit lll worst_so_far)
432
+ Returns unit with worst score in list."
433
+ (cond
434
+ ((null lll)
435
+ worst_so_far)
436
+ ((< (car (cdr worst_so_far)) (car (cdr (car lll))))
437
+ (worst_unit (cdr lll) (car lll)))
438
+ (t
439
+ (worst_unit (cdr lll) worst_so_far))))
440
+
441
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
442
+ ;;; Save the unit catalogue for use in the run-time index
443
+
444
+ (define (acost:save_catalogue utterances clunits_params)
445
+ "(acost:save_catalogue utterances clunits_params)
446
+ Save the catalogue with named units with times."
447
+ (let ((fd (fopen
448
+ (string-append
449
+ (get_param 'db_dir clunits_params "./")
450
+ (get_param 'catalogue_dir clunits_params "trees/")
451
+ (get_param 'index_name clunits_params "catalogue.")
452
+ ".catalogue")
453
+ "wb"))
454
+ (num_units 0)
455
+ )
456
+ (format fd "EST_File index\n")
457
+ (format fd "DataType ascii\n")
458
+ (format fd "NumEntries %d\n"
459
+ (apply
460
+ + (mapcar (lambda (u)
461
+ (length (utt.relation.items u 'Segment))) utterances)))
462
+ (format fd "IndexName %s\n" (get_param 'index_name clunits_params "cluser"))
463
+ (format fd "EST_Header_End\n")
464
+ (mapcar
465
+ (lambda (u)
466
+ (mapcar
467
+ (lambda (s)
468
+ (format fd "%s_%s %s %f %f %f\n"
469
+ (item.feat s "clunit_name")
470
+ (item.feat s 'occurid)
471
+ (utt.feat u 'fileid)
472
+ (item.feat s 'segment_start)
473
+ (item.feat s 'segment_mid)
474
+ (item.feat s 'segment_end)))
475
+ (utt.relation.items u 'Segment)))
476
+ utterances)
477
+ (fclose fd)))
478
+
479
+ (provide 'clunits_build.scm)
CosyVoice-ttsfrd/resource/festival/cmusphinx2_phones.scm ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;;
3
+ ;;; Carnegie Mellon University ;;;
4
+ ;;; and Alan W Black and Kevin Lenzo ;;;
5
+ ;;; Copyright (c) 1998-2000 ;;;
6
+ ;;; All Rights Reserved. ;;;
7
+ ;;; ;;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;;
9
+ ;;; this software and its documentation without restriction, including ;;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;;
13
+ ;;; the following conditions: ;;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;;
15
+ ;;; conditions and the following disclaimer. ;;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;;
17
+ ;;; 3. Original authors' names are not deleted. ;;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;;
19
+ ;;; derived from this software without specific prior written ;;;
20
+ ;;; permission. ;;;
21
+ ;;; ;;;
22
+ ;;; CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK ;;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;;
25
+ ;;; SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE ;;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;;
30
+ ;;; THIS SOFTWARE. ;;;
31
+ ;;; ;;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34
+ ;;;
35
+ ;;; A definition of the cmusphinx2 phone set used in the BU RADIO FM
36
+ ;;; corpus, some people call this the darpa set. This one
37
+ ;;; has the closures removed
38
+ ;;;
39
+
40
+ (defPhoneSet
41
+ cmusphinx2
42
+ ;;; Phone Features
43
+ (;; vowel or consonant
44
+ (vc + -)
45
+ ;; vowel length: short long dipthong schwa
46
+ (vlng s l d a 0)
47
+ ;; vowel height: high mid low
48
+ (vheight 1 2 3 0)
49
+ ;; vowel frontness: front mid back
50
+ (vfront 1 2 3 0)
51
+ ;; lip rounding
52
+ (vrnd + - 0)
53
+ ;; consonant type: stop fricative affricate nasal lateral approximant
54
+ (ctype s f a n l r 0)
55
+ ;; place of articulation: labial alveolar palatal labio-dental
56
+ ;; dental velar glottal
57
+ (cplace l a p b d v g 0)
58
+ ;; consonant voicing
59
+ (cvox + - 0)
60
+ )
61
+ ;; Phone set members
62
+ (
63
+
64
+ ;; Note these features were set by awb so they are wrong !!!
65
+
66
+ ; phone vc vl vh vf vr ct cp cv
67
+ (AA + l 3 3 - 0 0 0) ;; father
68
+ (AE + s 3 1 - 0 0 0) ;; fat
69
+ (AH + s 2 2 - 0 0 0) ;; but
70
+ (AO + l 3 3 + 0 0 0) ;; lawn
71
+ (AW + d 3 2 - 0 0 0) ;; how
72
+ (AX + a 2 2 - 0 0 0) ;; about
73
+ (AXR + a 2 2 - r a +)
74
+ (AY + d 3 2 - 0 0 0) ;; hide
75
+ (B - 0 0 0 0 s l +)
76
+ (CH - 0 0 0 0 a p -)
77
+ (D - 0 0 0 0 s a +)
78
+ (DH - 0 0 0 0 f d +)
79
+ (DX - 0 0 0 0 s a +)
80
+ (EH + s 2 1 - 0 0 0) ;; get
81
+ (ER + a 2 2 - r 0 0)
82
+ (EY + d 2 1 - 0 0 0) ;; gate
83
+ (F - 0 0 0 0 f b -)
84
+ (G - 0 0 0 0 s v +)
85
+ (HH - 0 0 0 0 f g -)
86
+ (IH + s 1 1 - 0 0 0) ;; bit
87
+ (IY + l 1 1 - 0 0 0) ;; beet
88
+ (JH - 0 0 0 0 a p +)
89
+ (K - 0 0 0 0 s v -)
90
+ (L - 0 0 0 0 l a +)
91
+ (M - 0 0 0 0 n l +)
92
+ (N - 0 0 0 0 n a +)
93
+ (NG - 0 0 0 0 n v +)
94
+ (OW + d 2 3 + 0 0 0) ;; lone
95
+ (OY + d 2 3 + 0 0 0) ;; toy
96
+ (P - 0 0 0 0 s l -)
97
+ (R - 0 0 0 0 r a +)
98
+ (S - 0 0 0 0 f a -)
99
+ (SH - 0 0 0 0 f p -)
100
+ (T - 0 0 0 0 s a -)
101
+ (TH - 0 0 0 0 f d -)
102
+ (UH + s 1 3 + 0 0 0) ;; full
103
+ (UW + l 1 3 + 0 0 0) ;; fool
104
+ (V - 0 0 0 0 f b +)
105
+ (W - 0 0 0 0 r l +)
106
+ (Y - 0 0 0 0 r p +)
107
+ (Z - 0 0 0 0 f a +)
108
+ (ZH - 0 0 0 0 f p +)
109
+ (SIL - 0 0 0 0 0 0 -) ; added
110
+ )
111
+ )
112
+
113
+ (PhoneSet.silences '(SIL))
114
+
115
+ (provide 'cmusphinx2_phones)
116
+
117
+
118
+
119
+
CosyVoice-ttsfrd/resource/festival/cslush.scm ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; Functions specific to using Festival in cslush part of the OGI toolkit
35
+ ;;; The OGI toolkit is a complete dialog building system with speech
36
+ ;;; recognition and synthesis (Festival) it is available for free for
37
+ ;;; research purposes from
38
+ ;;; http://www.cse.ogi.edu/CSLU/toolkit/toolkit.html
39
+ ;;;
40
+ ;;; Note this cslush interface requires you to compile festival
41
+ ;;; with tcl (7.6)
42
+ ;;;
43
+ ;;; The functions replace the C++ level functions Jacques H. de Villiers
44
+ ;;; <[email protected]> from CSLU wrote for the previous version
45
+ ;;;
46
+
47
+ (if (not (member 'tcl *modules*))
48
+ (error "cslush: can't load cslush, TCL not supported in this installation of Festival."))
49
+
50
+ (define (cslush.getwave utt)
51
+ "(cslush.getwave UTT)
52
+ Extract wave memory info, pass this to wave import in CSLUsh."
53
+ (format nil "%s %s %s"
54
+ (utt.wave.info utt 'data_addr)
55
+ (utt.wave.info utt 'num_samples)
56
+ (utt.wave.info utt 'sample_rate)))
57
+
58
+ (define (cslush.getphone utt)
59
+ "(cslush.getphone UTT)
60
+ Return segment names a single string of phones, for use to pass to
61
+ TCL."
62
+ (let ((phones ""))
63
+ (mapcar
64
+ (lambda (s)
65
+ (if (string-equal phones "")
66
+ (set! phones (format nil "%s" (utt.streamitem.feat utt s 'name)))
67
+ (set! phones (format nil "%s %s"
68
+ phones (utt.streamitem.feat utt s 'name)))))
69
+ (utt.stream utt 'Segment))
70
+ phones))
71
+
72
+ (define (cslush TCLCOMMAND)
73
+ "(cslush TCLCOMMAND)
74
+ Pass TCLCOMMAND to TCL interpreter, returns what TCL returns as a
75
+ string."
76
+ (tcl_eval TCLCOMMAND))
77
+
78
+
79
+ (provide 'cslush)
CosyVoice-ttsfrd/resource/festival/cstr.scm ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+
4
+
5
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6
+ ;;; DO NOT EDIT THIS FILE ON PAIN OF MORE PAIN.
7
+ ;;;
8
+ ;;; The master copy of this file is in ../../speech_tools/lib/siod/cstr.scm
9
+ ;;; and is copied here at build time.
10
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11
+
12
+
13
+
14
+
15
+
16
+
17
+
18
+
19
+
20
+
21
+
22
+
23
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24
+ ;;; ;;
25
+ ;;; Centre for Speech Technology Research ;;
26
+ ;;; University of Edinburgh, UK ;;
27
+ ;;; Copyright (c) 1996,1997 ;;
28
+ ;;; All Rights Reserved. ;;
29
+ ;;; ;;
30
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
31
+ ;;; this software and its documentation without restriction, including ;;
32
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
33
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
34
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
35
+ ;;; the following conditions: ;;
36
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
37
+ ;;; conditions and the following disclaimer. ;;
38
+ ;;; 2. Any modifications must be clearly marked as such. ;;
39
+ ;;; 3. Original authors' names are not deleted. ;;
40
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
41
+ ;;; derived from this software without specific prior written ;;
42
+ ;;; permission. ;;
43
+ ;;; ;;
44
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
45
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
46
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
47
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
48
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
49
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
50
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
51
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
52
+ ;;; THIS SOFTWARE. ;;
53
+ ;;; ;;
54
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
55
+ ;;;
56
+ ;;; CSTR siod extensions.
57
+
58
+ ;(defvar Parameter nil
59
+ ; "Parameter
60
+ ; An assoc-list of parameters and values for various parts of the speech
61
+ ; synthesis system. This is used by the functions Parameter.set
62
+ ; Parameter.def and Parameter.get as well as internal C++ functions.")
63
+
64
+ (defvar Param (feats.make)
65
+ "Param
66
+ A feature set for arbitrary parameters for modules.")
67
+
68
+ (define (Param.set name val)
69
+ "(Param.set NAME VAL)
70
+ Set parameter NAME to VAL (deleting any previous setting)"
71
+ (feats.set Param name val))
72
+
73
+ (define (Parameter.set name val)
74
+ "(Parameter.set NAME VAL)
75
+ Set parameter NAME to VAL (deleting any previous setting). This is
76
+ an old function and you should use Param.set instead."
77
+ (Param.set name val)
78
+ val
79
+ )
80
+
81
+ (define (Parameter.def name val)
82
+ "(Parameter.def NAME VAL)
83
+ Set parameter NAME to VAL if not already set. This is an OLD function
84
+ you shold use Param.def instead."
85
+ (Param.def name val)
86
+ )
87
+
88
+ (define (Param.def name val)
89
+ "(Param.def NAME VAL)
90
+ Set parameter NAME to VAL if not already set"
91
+ (if (not (feats.present Param name))
92
+ (feats.set Param name val)))
93
+
94
+ (define (Parameter.get name)
95
+ "(Parameter.get NAME)
96
+ Get parameter NAME's value (nil if unset). This is an OLD function
97
+ and may not exist in later versions (or change functionality). This
98
+ function (unlike Param.get) may return sylbols (rather than strings
99
+ if the val doesn't contain whitespace (to allow older functions to
100
+ still work."
101
+ (let ((val (Param.get name)))
102
+ (if (and (eq? 'string (typeof val))
103
+ (not (string-matches val ".*[ \t\r\n].*")))
104
+ (intern val)
105
+ val))
106
+ )
107
+
108
+ (define (Param.get name)
109
+ "(Param.get NAME)
110
+ Get parameter NAME's value (nil if unset)"
111
+ (feats.get Param name))
112
+
113
+ (define (get_param name params default)
114
+ "(get_param name params default)
115
+ Get named parameters in params returning default if its not present."
116
+ (let ((pair (assoc name params)))
117
+ (if pair
118
+ (car (cdr pair))
119
+ default)))
120
+
121
+ (provide 'cstr)
CosyVoice-ttsfrd/resource/festival/darpa_phones.scm ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1999 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;; Author: Alan W Black
34
+ ;;; Date: April 1999
35
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
36
+ ;;;
37
+ ;;; (yet another) darpa definition
38
+ ;;;
39
+
40
+ (require 'phoneset)
41
+
42
+ (set! darpa_fs (cadr
43
+ (defPhoneSet
44
+ darpa
45
+ (Features
46
+ (vowel (syllabic + -)
47
+ (length long short diphthong schwa)
48
+ (height high mid low)
49
+ (front front mid back)
50
+ (round + -))
51
+ (consonant
52
+ (syllabic + -)
53
+ (manner stop affricate fricative approximant nasal)
54
+ (place alveolar dental labial palatal velar)
55
+ (voicing + -))
56
+ (silence
57
+ (syllabic -)))
58
+ (Phones
59
+ ;; type syl length height front round
60
+ (aa vowel + long low back -)
61
+ (ae vowel + short low front -)
62
+ (ah vowel + short mid mid -)
63
+ (ao vowel + long low front +)
64
+ (aw vowel + diphthong low mid -)
65
+ (ax vowel + schwa mid mid -)
66
+ (axr vowel + schwa mid mid -)
67
+ (ay vowel + diphthong low mid -)
68
+ (eh vowel + short mid front -)
69
+ (ey vowel + diphthong mid front -)
70
+ (ih vowel + short high front -)
71
+ (iy vowel + long high front -)
72
+ (ow vowel + diphthong mid back +)
73
+ (oy vowel + diphthong mid back +)
74
+ (uh vowel + short high back +)
75
+ (uw vowel + long high back +)
76
+ ;; type syl manner place voicing
77
+ (b consonant - stop labial +)
78
+ (ch consonant - affricate alveolar -)
79
+ (d consonant - stop alveolar +)
80
+ (dh consonant - fricative dental +)
81
+ (dx consonant - stop alveolar +)
82
+ (el consonant + approximant alveolar +)
83
+ (em consonant + nasal labial +)
84
+ (en consonant + stop alveolar +)
85
+ (er consonant + approximant alveolar +)
86
+ (f consonant - fricative labial -)
87
+ (g consonant - stop velar +)
88
+ (hh consonant - fricative velar -)
89
+ (jh consonant - affricate alveolar +)
90
+ (k consonant - stop velar -)
91
+ (l consonant - approximant alveolar +)
92
+ (m consonant - nasal labial +)
93
+ (n consonant - nasal alveolar +)
94
+ (nx consonant - nasal alveolar +)
95
+ (ng consonant - nasal velar +)
96
+ (p consonant - stop labial -)
97
+ (r consonant - approximant alveolar +)
98
+ (s consonant - fricative alveolar -)
99
+ (sh consonant - fricative palatal -)
100
+ (t consonant - stop alveolar -)
101
+ (th consonant - fricative dental -)
102
+ (v consonant - fricative labial +)
103
+ (w consonant - approximant velar +)
104
+ (y consonant - approximant palatal +)
105
+ (z consonant - fricative alveolar +)
106
+ (zh consonant - fricative palatal +)
107
+ (pau silence -)
108
+ ; (sil silence -)
109
+ ))))
110
+
111
+ (provide 'darpa_phones)
112
+
113
+
114
+
115
+
CosyVoice-ttsfrd/resource/festival/display.scm ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;; Author: Alan W Black
34
+ ;;; Date: December 1996
35
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
36
+ ;;; An xwaves display function for utterances
37
+ ;;;
38
+ ;;; Requires Xwaves to be running, saves labels etc and sends
39
+ ;;; messages to Xwaves to display the utterance.
40
+ ;;;
41
+ ;;; This can be a model for other display functions.
42
+ ;;;
43
+
44
+ (define (display utt)
45
+ "(display utt)
46
+ Display an utterance's waveform, F0 and segment labels in Xwaves.
47
+ Xwaves must be running on the current machine, with a labeller for
48
+ this to work."
49
+ (let ((tmpname (make_tmp_filename)))
50
+ (utt.save.wave utt (string-append tmpname ".wav"))
51
+ (utt.save.segs utt (string-append tmpname ".lab"))
52
+ (utt.save.f0 utt (string-append tmpname ".f0"))
53
+ (system (format nil "send_xwaves make file %s name %s height 150"
54
+ (string-append tmpname ".f0") tmpname))
55
+ (system (format nil "send_xwaves make name %s file %s height 200"
56
+ tmpname (string-append tmpname ".wav")))
57
+ (system (format nil "send_xwaves send make file %s name %s color 125"
58
+ (string-append tmpname ".lab") tmpname))
59
+ (system (format nil "send_xwaves send activate name %s fields 1"
60
+ tmpname))
61
+ (system (format nil "send_xwaves %s align file %s"
62
+ tmpname (string-append tmpname ".wav"))))
63
+ )
64
+
65
+ (provide 'display)
66
+
67
+
68
+
69
+
CosyVoice-ttsfrd/resource/festival/duration.scm ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; Basic Duration module which will call appropriate duration
35
+ ;;; (C++) modules based on set parameter
36
+ ;;;
37
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
38
+
39
+ ;;; These modules should predict intonation events/labels
40
+ ;;; based on information in the phrase and word streams
41
+
42
+ (define (Duration utt)
43
+ "(Duration utt)
44
+ Predict segmental durations using Duration_Method defined in Parameters.
45
+ Four methods are currently available: averages, Klatt rules, CART tree
46
+ based, and fixed duration."
47
+ (let ((rval (apply_method 'Duration_Method utt)))
48
+ (cond
49
+ (rval rval) ;; new style
50
+ ;; 1.1.1 voices still use other names
51
+ ((eq 'Averages (Parameter.get 'Duration_Method))
52
+ (Duration_Averages utt))
53
+ ((eq 'Klatt (Parameter.get 'Duration_Method))
54
+ (Duration_Klatt utt))
55
+ ((eq 'Tree_ZScores (Parameter.get 'Duration_Method))
56
+ (Duration_Tree_ZScores utt))
57
+ ((eq 'Tree (Parameter.get 'Duration_Method))
58
+ (Duration_Tree utt))
59
+ (t
60
+ (Duration_Default utt)))))
61
+
62
+ (define (Duration_LogZScores utt)
63
+ "(Duration_LogZScores utt)
64
+ Predicts duration to segments using the CART tree in duration_logzscore_tree
65
+ and duration_logzscore_tree_silence which produces a zscore of the log
66
+ duration. The variable duration_logzscore_ph_info contains (log) means
67
+ and std for each phone in the set."
68
+ (let ((silence (car (car (cdr (assoc 'silences (PhoneSet.description))))))
69
+ ldurinfo)
70
+ (mapcar
71
+ (lambda (s)
72
+ (if (string-equal silence (item.name s))
73
+ (set! ldurinfo
74
+ (wagon s duration_logzscore_tree_silence))
75
+ (set! ldurinfo
76
+ (wagon s duration_logzscore_tree)))
77
+ (set! dur (exp (duration_unzscore
78
+ (item.name s)
79
+ (car (last ldurinfo))
80
+ duration_logzscore_ph_info)))
81
+ (set! dur (* dur (duration_find_stretch s)))
82
+ (item.set_feat
83
+ s "end" (+ dur (item.feat s "start_segment"))))
84
+ (utt.relation.items utt 'Segment))
85
+ utt))
86
+
87
+ (define (duration_unzscore phname zscore table)
88
+ "(duration_unzscore phname zscore table)
89
+ Look up phname in table and convert xscore back to absolute domain."
90
+ (let ((phinfo (assoc phname table))
91
+ mean std)
92
+ (if phinfo
93
+ (begin
94
+ (set! mean (car (cdr phinfo)))
95
+ (set! std (car (cdr (cdr phinfo)))))
96
+ (begin
97
+ (format t "Duration: unzscore no info for %s\n" phname)
98
+ (set! mean 0.100)
99
+ (set! std 0.25)))
100
+ (+ mean (* zscore std))))
101
+
102
+ (define (duration_find_stretch seg)
103
+ "(duration_find_stretch utt seg)
104
+ Find any relavant duration stretch."
105
+ (let ((global (Parameter.get 'Duration_Stretch))
106
+ (local (item.feat
107
+ seg "R:SylStructure.parent.parent.R:Token.parent.dur_stretch")))
108
+ (if (or (not global)
109
+ (equal? global 0.0))
110
+ (set! global 1.0))
111
+ (if (string-equal local 0.0)
112
+ (set! local 1.0))
113
+ (* global local)))
114
+
115
+ ;; These provide lisp level functions, some of which have
116
+ ;; been converted in C++ (in festival/src/modules/base/ff.cc)
117
+ (define (onset_has_ctype seg type)
118
+ ;; "1" if onset contains ctype
119
+ (let ((syl (item.relation.parent seg 'SylStructure)))
120
+ (if (not syl)
121
+ "0" ;; a silence
122
+ (let ((segs (item.relation.daughters syl 'SylStructure))
123
+ (v "0"))
124
+ (while (and segs
125
+ (not (string-equal
126
+ "+"
127
+ (item.feat (car segs) "ph_vc"))))
128
+ (if (string-equal
129
+ type
130
+ (item.feat (car segs) "ph_ctype"))
131
+ (set! v "1"))
132
+ (set! segs (cdr segs)))
133
+ v))))
134
+
135
+ (define (coda_has_ctype seg type)
136
+ ;; "1" if coda contains ctype
137
+ (let ((syl (item.relation.parent seg 'SylStructure)))
138
+ (if (not syl)
139
+ "0" ;; a silence
140
+ (let ((segs (reverse (item.relation.daughters
141
+ syl 'SylStructure)))
142
+ (v "0"))
143
+ (while (and segs
144
+ (not (string-equal
145
+ "+"
146
+ (item.feat (car segs) "ph_vc"))))
147
+ (if (string-equal
148
+ type
149
+ (item.feat (car segs) "ph_ctype"))
150
+ (set! v "1"))
151
+ (set! segs (cdr segs)))
152
+ v))))
153
+
154
+ (define (onset_stop seg)
155
+ (onset_has_ctype seg "s"))
156
+ (define (onset_fric seg)
157
+ (onset_has_ctype seg "f"))
158
+ (define (onset_nasal seg)
159
+ (onset_has_ctype seg "n"))
160
+ (define (onset_glide seg)
161
+ (let ((l (onset_has_ctype seg "l")))
162
+ (if (string-equal l "0")
163
+ (onset_has_ctype seg "r")
164
+ "1")))
165
+ (define (coda_stop seg)
166
+ (coda_has_ctype seg "s"))
167
+ (define (coda_fric seg)
168
+ (coda_has_ctype seg "f"))
169
+ (define (coda_nasal seg)
170
+ (coda_has_ctype seg "n"))
171
+ (define (coda_glide seg)
172
+ (let ((l (coda_has_ctype seg "l")))
173
+ (if (string-equal l "0")
174
+ (coda_has_ctype seg "r")
175
+ "1")))
176
+
177
+ (define (Unisyn_Duration utt)
178
+ "(UniSyn_Duration utt)
179
+ predicts Segment durations is some speficied way but holds the
180
+ result in a way necessary for other Unisyn code."
181
+ (let ((end 0))
182
+ (mapcar
183
+ (lambda (s)
184
+ (item.get_utt s)
185
+ (let ((dur (wagon_predict s duration_cart_tree)))
186
+ (set! dur (* (Parameter.get 'Duration_Stretch) dur))
187
+ (set! end (+ dur end))
188
+ (item.set_feat s "target_dur" dur)
189
+ (item.set_function s "start" "unisyn_start")
190
+ (item.set_feat s "end" end)
191
+ (item.set_feat s "dur" dur)
192
+ ))
193
+ (utt.relation.items utt 'Segment))
194
+ utt))
195
+
196
+ (provide 'duration)
CosyVoice-ttsfrd/resource/festival/email-mode.scm ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; An example tts text mode for reading email messages, this includes
35
+ ;;; support for extracting the interesting headers from the message
36
+ ;;; and for dealing with quoted text. Its all very primitive and
37
+ ;;; will easily be confused but its here just as an example
38
+ ;;;
39
+
40
+ (define (email_init_func)
41
+ "(email_init_func)
42
+ Called on starting email text mode."
43
+ (voice_rab_diphone)
44
+ (set! email_previous_t2w_func token_to_words)
45
+ (set! english_token_to_words email_token_to_words)
46
+ (set! token_to_words english_token_to_words)
47
+ (set! email_in_quote nil))
48
+
49
+ (define (email_exit_func)
50
+ "(email_exit_func)
51
+ Called on exit email text mode."
52
+ (set! english_token_to_words email_previous_t2w_func)
53
+ (set! token_to_words english_token_to_words))
54
+
55
+ (define (email_token_to_words token name)
56
+ "(email_token_to_words utt token name)
57
+ Email spcific token to word rules."
58
+ (cond
59
+ ((string-matches name "<.*@.*>")
60
+ (append
61
+ (email_previous_t2w_func token
62
+ (string-after (string-before name "@") "<"))
63
+ (cons
64
+ "at"
65
+ (email_previous_t2w_func token
66
+ (string-before (string-after name "@") ">")))))
67
+ ((and (string-matches name ">")
68
+ (string-matches (item.feat token "whitespace")
69
+ "[ \t\n]*\n *"))
70
+ (voice_cmu_us_awb_cg)
71
+ nil ;; return nothing to say
72
+ )
73
+ (t ;; for all other cases
74
+ (if (string-matches (item.feat token "whitespace")
75
+ ".*\n[ \n]*")
76
+ (voice_rab_diphone))
77
+ (email_previous_t2w_func token name))))
78
+
79
+ (set! tts_text_modes
80
+ (cons
81
+ (list
82
+ 'email ;; mode name
83
+ (list ;; email mode params
84
+ (list 'init_func email_init_func)
85
+ (list 'exit_func email_exit_func)
86
+ '(filter "email_filter")))
87
+ tts_text_modes))
88
+
89
+ (provide 'email-mode)
CosyVoice-ttsfrd/resource/festival/engmorph.scm ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;; Author: Alan W Black
34
+ ;;; Date: December 1997
35
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
36
+ ;;;
37
+ ;;; THIS IS EXPERIMENTAL AND DOES *NOT* WORK
38
+ ;;;
39
+ ;;; Koskenniemi-style context rewrite rules for English Morphographemics
40
+ ;;; Basically splits words into their (potential) morphemes.
41
+ ;;;
42
+ ;;; Based (roughly) on the rules in "Computational Morphology"
43
+ ;;; Ritchie et al. MIT Press 1992.
44
+ ;;;
45
+ ;;; This is not a Scheme file and can't be loaded and evaluated
46
+ ;;; It is designed for use with the wfst tools in the speech tools
47
+ ;;; e.g. wfst_build -type kk -o engmorph.wfst -detmin engmorph.scm
48
+ ;;;
49
+
50
+ (KKrules
51
+ engmorph
52
+ (Alphabets
53
+ ;; Input Alphabet
54
+ (a b c d e f g h i j k l m n o p q r s t u v w x y z #)
55
+ ;; Output Alphabet
56
+ (a b c d e f g h i j k l m n o p q r s t u v w x y z + #)
57
+ )
58
+ (Sets
59
+ (LET a b c d e f g h i j k l m n o p q r s t u v w x y z)
60
+ )
61
+ (Rules
62
+ ;; The basic rules
63
+ ( a => nil --- nil)
64
+ ( b => nil --- nil)
65
+ ( c => nil --- nil)
66
+ ( d => nil --- nil)
67
+ ( e => nil --- nil)
68
+ ( f => nil --- nil)
69
+ ( g => nil --- nil)
70
+ ( h => nil --- nil)
71
+ ( i => nil --- nil)
72
+ ( j => nil --- nil)
73
+ ( k => nil --- nil)
74
+ ( l => nil --- nil)
75
+ ( m => nil --- nil)
76
+ ( n => nil --- nil)
77
+ ( o => nil --- nil)
78
+ ( p => nil --- nil)
79
+ ( q => nil --- nil)
80
+ ( r => nil --- nil)
81
+ ( s => nil --- nil)
82
+ ( t => nil --- nil)
83
+ ( u => nil --- nil)
84
+ ( v => nil --- nil)
85
+ ( w => nil --- nil)
86
+ ( x => nil --- nil)
87
+ ( y => nil --- nil)
88
+ ( z => nil --- nil)
89
+ ( # => nil --- nil)
90
+ ; ( _epsilon_/+ => (or LET _epsilon_/e ) --- (LET))
91
+ ( _epsilon_/+ => (or LET _epsilon_/e) --- nil)
92
+
93
+ ;; The rules that do interesting things
94
+
95
+ ;; Epenthesis
96
+ ;; churches -> church+s
97
+ ;; boxes -> box+s
98
+ (e/+ <=> (or (s h) (or s x z) (i/y) (c h))
99
+ ---
100
+ (s))
101
+ ;; Gemination
102
+ (b/+ <=> ( (or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) b )
103
+ ---
104
+ ((or a e i o u)))
105
+ (d/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) d )
106
+ ---
107
+ ((or a e i o u)))
108
+ (f/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) f )
109
+ ---
110
+ ((or a e i o u)))
111
+ (g/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) g )
112
+ ---
113
+ ((or a e i o u)))
114
+ (m/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) m )
115
+ ---
116
+ ((or a e i o u)))
117
+ (p/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) p )
118
+ ---
119
+ ((or a e i o u)))
120
+ (s/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) s )
121
+ ---
122
+ ((or a e i o u)))
123
+ (t/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) t )
124
+ ---
125
+ ((or a e i o u)))
126
+ (z/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) z )
127
+ ---
128
+ ((or a e i o u)))
129
+ (n/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) n )
130
+ ---
131
+ ((or a e i o u)))
132
+ (l/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) l )
133
+ ---
134
+ ((or a e i o u)))
135
+ (r/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) r )
136
+ ---
137
+ ((or a e i o u)))
138
+ ;; tries->try+s
139
+ ( i/y <=> ((or b c d f g h j k l m n p q r s t v w x z))
140
+ ---
141
+ ((or ( e/+ s )
142
+ ( _epsilon_/+ (or a d e f h i l m n o p s w y)))))
143
+ ;; Elision
144
+ ;; moved -> move+ed
145
+ (_epsilon_/e <=>
146
+ ((or a e i o u ) (or b c d f g j k l m n p q r s t v x z))
147
+ ---
148
+ ( _epsilon_/+ (or a e i o u )))
149
+
150
+ )
151
+ )
CosyVoice-ttsfrd/resource/festival/engmorphsyn.scm ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;; Author: Alan W Black
34
+ ;;; Date: December 1997
35
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
36
+ ;;;
37
+ ;;; THIS IS EXPERIMENTAL AND DOES *NOT* WORK
38
+ ;;;
39
+ ;;;
40
+ ;;; An English morpho-syntax finite-state grammar
41
+ ;;; This is used for morphological decomposition of unknown words
42
+ ;;; specifically (only) words that are not found in the lexicon.
43
+ ;;; This idea is that when an unknown word is found an attempt is made
44
+ ;;; to see if it contains any well known morphological inflections or
45
+ ;;; derivations, if so a better use of LTS can be made on the root, of
46
+ ;;; none are found this
47
+ ;;;
48
+ ;;;
49
+ ;;; Based on "Analysis of Unknown Words through Morphological
50
+ ;;; Decomposition", Black, van de Plassche, Willians, European ACL 91.
51
+ ;;; with the anyword matcher from a question by Lauri Karttunen after
52
+ ;;; the talk.
53
+ ;;;
54
+ ;;; The suffixes and finite-state morph-syntax grammar is based
55
+ ;;; (very roughly) on the rules in "Computational Morphology"
56
+ ;;; Ritchie et al. MIT Press 1992.
57
+ ;;;
58
+ ;;; Can be compiled with
59
+ ;;; wfst_build -type rg -o engmorphsyn.wfst -detmin engmorphsyn.scm
60
+ ;;;
61
+ ;;; The result can be combined with the morphographemic rules
62
+ ;;; with
63
+ ;;; wfst_build -type compose engmorph.wfst engmorphsyn.wfst -detmin -o engstemmer.wfst
64
+ ;;;
65
+ ;;; echo "# b o x e/+ s #" | wfst_run -wfst engstemmer.wfst -recog
66
+ ;;; state 0 #/# -> 1
67
+ ;;; state 1 b/b -> 3
68
+ ;;; state 3 o/o -> 17
69
+ ;;; state 17 x/x -> 14
70
+ ;;; state 14 e/+ -> 36
71
+ ;;; state 36 s/s -> 34
72
+ ;;; state 34 #/# -> 16
73
+ ;;; OK.
74
+ ;;; echo "# b o x e s #" | wfst_run -wfst engstemmer.wfst -recog
75
+ ;;; state 0 #/# -> 1
76
+ ;;; state 1 b/b -> 3
77
+ ;;; state 3 o/o -> 17
78
+ ;;; state 17 x/x -> 14
79
+ ;;; state 14 e/e -> 22
80
+ ;;; state 22 s/s -> -1
81
+
82
+ (RegularGrammar
83
+ engsuffixmorphosyntax
84
+ ;; Sets
85
+ (
86
+ (V a e i o u y)
87
+ (C b c d f g h j k l m n p q r s t v w x y z)
88
+ )
89
+ ;; Rules
90
+
91
+ (
92
+ ;; A word *must* have a suffix to be recognized
93
+ (Word -> # Syls Suffix )
94
+ (Word -> # Syls End )
95
+
96
+ ;; This matches any string of characters that contains at least one vowel
97
+ (Syls -> Syl Syls )
98
+ (Syls -> Syl )
99
+ (Syl -> Cs V Cs )
100
+ (Cs -> C Cs )
101
+ (Cs -> )
102
+
103
+ (Suffix -> VerbSuffix )
104
+ (Suffix -> NounSuffix )
105
+ (Suffix -> AdjSuffix )
106
+ (VerbSuffix -> VerbFinal End )
107
+ (VerbSuffix -> VerbtoNoun NounSuffix )
108
+ (VerbSuffix -> VerbtoNoun End )
109
+ (VerbSuffix -> VerbtoAdj AdjSuffix )
110
+ (VerbSuffix -> VerbtoAdj End )
111
+ (NounSuffix -> NounFinal End )
112
+ (NounSuffix -> NountoNoun NounSuffix )
113
+ (NounSuffix -> NountoNoun End )
114
+ (NounSuffix -> NountoAdj AdjSuffix )
115
+ (NounSuffix -> NountoAdj End )
116
+ (NounSuffix -> NountoVerb VerbSuffix )
117
+ (NounSuffix -> NountoVerb End )
118
+ (AdjSuffix -> AdjFinal End )
119
+ (AdjSuffix -> AdjtoAdj AdjSuffix)
120
+ (AdjSuffix -> AdjtoAdj End)
121
+ (AdjSuffix -> AdjtoAdv End) ;; isn't any Adv to anything
122
+
123
+ (End -> # ) ;; word boundary symbol *always* present
124
+
125
+ (VerbFinal -> + e d)
126
+ (VerbFinal -> + i n g)
127
+ (VerbFinal -> + s)
128
+
129
+ (VerbtoNoun -> + e r)
130
+ (VerbtoNoun -> + e s s)
131
+ (VerbtoNoun -> + a t i o n)
132
+ (VerbtoNoun -> + i n g)
133
+ (VerbtoNoun -> + m e n t)
134
+
135
+ (VerbtoAdj -> + a b l e)
136
+
137
+ (NounFinal -> + s)
138
+
139
+ (NountoNoun -> + i s m)
140
+ (NountoNoun -> + i s t)
141
+ (NountoNoun -> + s h i p)
142
+
143
+ (NountoAdj -> + l i k e)
144
+ (NountoAdj -> + l e s s)
145
+ (NountoAdj -> + i s h)
146
+ (NountoAdj -> + o u s)
147
+
148
+ (NountoVerb -> + i f y)
149
+ (NountoVerb -> + i s e)
150
+ (NountoVerb -> + i z e)
151
+
152
+ (AdjFinal -> + e r)
153
+ (AdjFinal -> + e s t)
154
+
155
+ (AdjtoAdj -> + i s h)
156
+ (AdjtoAdv -> + l y)
157
+ (AdjtoNoun -> + n e s s)
158
+ (AdjtoVerb -> + i s e)
159
+ (AdjtoVerb -> + i z e)
160
+
161
+ )
162
+ )
163
+
164
+
165
+
166
+
167
+
168
+
169
+
170
+
CosyVoice-ttsfrd/resource/festival/f2bdurtreeZ.scm ADDED
@@ -0,0 +1,869 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; First attempt at a tree to learn durations. Although
35
+ ;;; it was trained from F2B and the radio phone set should
36
+ ;;; work for others that are decalred with the same phone
37
+ ;;; features
38
+ ;;;
39
+
40
+ ;; in ancient items (not on independent data)
41
+ ;; RMSE 0.821086 Correlation is 0.573693 Mean (abs) Error 0.612327 (0.547034)
42
+
43
+ ;; on independent test data
44
+ ;; RMSE 0.8054 Correlation is 0.5327 Mean (abs) Error 0.6073 (0.5290)
45
+
46
+ (set! f2b_duration_cart_tree
47
+ '
48
+ ((name is #)
49
+ ((emph_sil is +)
50
+ ((0.0 -0.5))
51
+ ((R:Segment.p.R:SylStructure.parent.parent.pbreak is BB)
52
+ ((0.0 2.0))
53
+ ((0.0 0.0))))
54
+ ((R:SylStructure.parent.accented is 0)
55
+ ((R:Segment.p.ph_ctype is 0)
56
+ ((R:Segment.n.ph_cplace is 0)
57
+ ((ph_ctype is n)
58
+ ((R:SylStructure.parent.position_type is initial)
59
+ ((ph_cplace is a)
60
+ ((0.675606 -0.068741))
61
+ ((0.674321 0.204279)))
62
+ ((ph_cplace is l)
63
+ ((0.688993 -0.124997))
64
+ ((R:SylStructure.parent.syl_out < 10)
65
+ ((0.610881 -0.394451))
66
+ ((0.664504 -0.603196)))))
67
+ ((ph_ctype is r)
68
+ ((lisp_onset_glide is 0)
69
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 0)
70
+ ((0.949991 0.619256))
71
+ ((1.05066 0.979668)))
72
+ ((0.858728 0.457972)))
73
+ ((R:SylStructure.parent.position_type is single)
74
+ ((syl_initial is 0)
75
+ ((ph_ctype is s)
76
+ ((0.692981 -0.788933))
77
+ ((0.834878 -0.116988)))
78
+ ((R:SylStructure.parent.syl_out < 9.4)
79
+ ((0.777932 0.357818))
80
+ ((0.852909 0.115478))))
81
+ ((R:Segment.n.ph_vrnd is +)
82
+ ((ph_ctype is s)
83
+ ((0.81305 0.87399))
84
+ ((0.65978 0.418928)))
85
+ ((R:SylStructure.parent.position_type is final)
86
+ ((R:SylStructure.parent.parent.word_numsyls < 2.3)
87
+ ((0.71613 -0.2888))
88
+ ((0.642029 0.0624649)))
89
+ ((R:Segment.nn.ph_cplace is a)
90
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 1)
91
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 1)
92
+ ((R:SylStructure.parent.position_type is initial)
93
+ ((0.854092 0.384456))
94
+ ((0.769274 0.10705)))
95
+ ((lisp_coda_stop is 0)
96
+ ((0.571763 0.0755348))
97
+ ((0.632928 -0.11117))))
98
+ ((lisp_coda_stop is 0)
99
+ ((R:SylStructure.parent.syl_out < 8.6)
100
+ ((0.555092 0.30006))
101
+ ((0.552673 -0.0263481)))
102
+ ((0.903186 0.519185))))
103
+ ((R:Segment.nn.ph_cplace is p)
104
+ ((0.563915 0.204967))
105
+ ((R:Segment.nn.ph_cvox is -)
106
+ ((ph_ctype is s)
107
+ ((0.67653 0.227681))
108
+ ((0.550623 0.435079)))
109
+ ((R:SylStructure.parent.position_type is initial)
110
+ ((0.93428 0.732003))
111
+ ((0.84114 0.423214)))))))))))
112
+ ((R:Segment.n.ph_ctype is s)
113
+ ((ph_ctype is s)
114
+ ((0.693376 -1.02719))
115
+ ((R:Segment.n.ph_cplace is v)
116
+ ((ph_ctype is r)
117
+ ((0.539799 -0.344524))
118
+ ((0.858576 0.154275)))
119
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 1.2)
120
+ ((lisp_onset_glide is 0)
121
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 1)
122
+ ((ph_ctype is n)
123
+ ((R:Segment.nn.ph_cplace is a)
124
+ ((0.64604 -0.643797))
125
+ ((0.739746 -0.450649)))
126
+ ((ph_ctype is f)
127
+ ((0.657043 -0.462107))
128
+ ((0.798438 -0.19569))))
129
+ ((R:SylStructure.parent.syl_out < 8.4)
130
+ ((lisp_coda_stop is 0)
131
+ ((0.766789 -0.0484781))
132
+ ((0.717203 -0.322113)))
133
+ ((R:SylStructure.parent.position_type is single)
134
+ ((0.508168 -0.412874))
135
+ ((0.703458 -0.291121)))))
136
+ ((0.574827 -0.65022)))
137
+ ((0.801765 -0.120813)))))
138
+ ((ph_ctype is n)
139
+ ((R:Segment.n.ph_ctype is f)
140
+ ((R:Segment.n.ph_cplace is b)
141
+ ((0.797652 0.623764))
142
+ ((R:Segment.n.ph_cplace is a)
143
+ ((R:Segment.n.seg_onsetcoda is coda)
144
+ ((0.675567 0.288251))
145
+ ((0.854197 0.626272)))
146
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 1)
147
+ ((0.660394 -0.225466))
148
+ ((0.65275 0.0487195)))))
149
+ ((R:Segment.n.ph_ctype is n)
150
+ ((0.685613 -0.512227))
151
+ ((0.736366 -0.104066))))
152
+ ((R:Segment.n.ph_ctype is r)
153
+ ((R:SylStructure.parent.position_type is initial)
154
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.1)
155
+ ((0.98185 0.152471))
156
+ ((0.851907 0.788208)))
157
+ ((ph_ctype is f)
158
+ ((0.76106 0.406474))
159
+ ((R:Segment.n.ph_cplace is a)
160
+ ((1.01348 -0.0422549))
161
+ ((0.786777 -0.714839)))))
162
+ ((ph_cplace is b)
163
+ ((R:SylStructure.parent.syl_out < 10.4)
164
+ ((0.799025 0.0992277))
165
+ ((0.851068 -0.115896)))
166
+ ((R:Segment.n.ph_cplace is p)
167
+ ((0.669855 -0.655488))
168
+ ((ph_ctype is r)
169
+ ((R:Segment.n.ph_cplace is a)
170
+ ((1.00772 0.130892))
171
+ ((0.635981 -0.35826)))
172
+ ((R:Segment.n.ph_ctype is l)
173
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 1)
174
+ ((0.746089 -0.286007))
175
+ ((0.89158 0.154432)))
176
+ ((R:Segment.n.ph_cplace is b)
177
+ ((1.04971 -0.0449782))
178
+ ((R:SylStructure.parent.syl_out < 9.8)
179
+ ((R:Segment.n.ph_ctype is f)
180
+ ((R:Segment.n.seg_onsetcoda is coda)
181
+ ((1.4144 0.143658))
182
+ ((0.781116 -0.281483)))
183
+ ((ph_vlng is 0)
184
+ ((0.755959 -0.33462))
185
+ ((0.81024 -0.615287))))
186
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.3)
187
+ ((0.7426 -0.24342))
188
+ ((R:Segment.n.ph_ctype is f)
189
+ ((R:Segment.n.ph_cplace is a)
190
+ ((R:SylStructure.parent.position_type is single)
191
+ ((0.578639 -0.322097))
192
+ ((0.55826 -0.663238)))
193
+ ((0.616575 -0.713688)))
194
+ ((0.759572 -0.314116))))))))))))))
195
+ ((R:Segment.n.ph_ctype is f)
196
+ ((ph_ctype is 0)
197
+ ((R:Segment.p.ph_ctype is r)
198
+ ((R:SylStructure.parent.parent.word_numsyls < 2.2)
199
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 1)
200
+ ((0.733193 -0.180968))
201
+ ((0.563111 -0.467934)))
202
+ ((0.426244 -0.758137)))
203
+ ((ph_vlng is a)
204
+ ((R:Segment.n.ph_cplace is b)
205
+ ((R:Segment.nn.ph_cvox is +)
206
+ ((0.680234 0.059855))
207
+ ((R:SylStructure.parent.position_type is single)
208
+ ((0.980851 0.443893))
209
+ ((0.715307 0.112865))))
210
+ ((R:Segment.p.ph_cplace is a)
211
+ ((0.851224 0.695863))
212
+ ((R:Segment.nn.ph_cvox is -)
213
+ ((0.75892 0.195772))
214
+ ((0.630633 0.478738)))))
215
+ ((R:Segment.n.seg_onsetcoda is coda)
216
+ ((R:Segment.n.ph_cplace is b)
217
+ ((R:Segment.nn.ph_cplace is 0)
218
+ ((0.815979 -0.477579))
219
+ ((0.851491 -0.168622)))
220
+ ((R:SylStructure.parent.position_type is single)
221
+ ((R:Segment.nn.ph_cvox is +)
222
+ ((1.14265 0.717697))
223
+ ((0.814726 0.291482)))
224
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 0)
225
+ ((0.512322 -0.0749096))
226
+ ((0.488216 0.112774)))))
227
+ ((R:SylStructure.parent.position_type is final)
228
+ ((0.693071 -0.200708))
229
+ ((R:Segment.p.ph_cvox is +)
230
+ ((0.489147 -0.378728))
231
+ ((0.695396 -0.525028)))))))
232
+ ((ph_vlng is s)
233
+ ((0.464234 -0.162706))
234
+ ((R:Segment.p.ph_cvox is +)
235
+ ((R:SylStructure.parent.parent.word_numsyls < 2.2)
236
+ ((0.566845 -0.616918))
237
+ ((0.92747 -0.26777)))
238
+ ((0.632833 -0.858295)))))
239
+ ((R:Segment.n.ph_vrnd is 0)
240
+ ((R:Segment.p.ph_ctype is r)
241
+ ((ph_vlng is 0)
242
+ ((0.845308 -0.23426))
243
+ ((R:SylStructure.parent.syl_out < 4.8)
244
+ ((R:Segment.n.ph_ctype is n)
245
+ ((0.484602 -0.850587))
246
+ ((0.535398 -0.586652)))
247
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.3)
248
+ ((ph_vlng is a)
249
+ ((0.368898 -0.799533))
250
+ ((lisp_coda_stop is 0)
251
+ ((0.387923 -1.11431))
252
+ ((0.407377 -0.859849))))
253
+ ((R:Segment.n.ph_cplace is a)
254
+ ((ph_vlng is a)
255
+ ((0.382367 -0.787669))
256
+ ((0.522121 -0.687376)))
257
+ ((0.361185 -0.853639))))))
258
+ ((ph_vlng is a)
259
+ ((ph_ctype is 0)
260
+ ((R:Segment.n.ph_ctype is s)
261
+ ((R:Segment.p.ph_cvox is +)
262
+ ((R:Segment.p.ph_cplace is d)
263
+ ((0.502849 -0.232866))
264
+ ((R:SylStructure.parent.position_type is initial)
265
+ ((0.641714 -0.0545426))
266
+ ((R:SylStructure.parent.parent.word_numsyls < 2.6)
267
+ ((0.613913 0.373746))
268
+ ((R:Segment.n.ph_cplace is v)
269
+ ((0.581158 0.310101))
270
+ ((0.628758 -0.068165))))))
271
+ ((R:SylStructure.parent.position_type is mid)
272
+ ((0.459281 -0.553794))
273
+ ((0.728208 -0.138806))))
274
+ ((R:Segment.p.ph_cplace is v)
275
+ ((0.32179 -0.728364))
276
+ ((R:Segment.p.ph_cplace is l)
277
+ ((0.562971 -0.550272))
278
+ ((R:SylStructure.parent.position_type is initial)
279
+ ((0.937298 -0.0246324))
280
+ ((R:Segment.p.ph_cvox is +)
281
+ ((R:Segment.n.ph_ctype is n)
282
+ ((R:Segment.n.ph_cplace is a)
283
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 0)
284
+ ((0.434029 -0.404793))
285
+ ((1.05548 -0.103717)))
286
+ ((0.408372 -0.556145)))
287
+ ((0.712335 -0.118776)))
288
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.3)
289
+ ((0.379593 -0.658075))
290
+ ((0.549207 -0.494876))))))))
291
+ ((R:SylStructure.parent.position_type is final)
292
+ ((0.597124 -0.649729))
293
+ ((0.628822 -1.03743))))
294
+ ((ph_ctype is s)
295
+ ((R:Segment.n.ph_ctype is r)
296
+ ((R:SylStructure.parent.syl_out < 8.4)
297
+ ((0.760328 0.31651))
298
+ ((0.738363 -0.0177161)))
299
+ ((R:Segment.n.ph_ctype is l)
300
+ ((0.649328 -0.108791))
301
+ ((0.594945 -0.712753))))
302
+ ((ph_vlng is s)
303
+ ((R:Segment.n.ph_ctype is s)
304
+ ((R:Segment.n.ph_cplace is v)
305
+ ((R:Segment.nn.ph_cplace is a)
306
+ ((0.583211 0.0724331))
307
+ ((0.434605 -0.229857)))
308
+ ((R:Segment.p.ph_cplace is a)
309
+ ((R:SylStructure.parent.position_type is single)
310
+ ((0.785502 -0.00061573))
311
+ ((0.544995 -0.432984)))
312
+ ((R:Segment.nn.ph_cplace is 0)
313
+ ((0.507071 -0.715041))
314
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 0)
315
+ ((0.506404 -0.573733))
316
+ ((0.62466 -0.3356))))))
317
+ ((R:Segment.p.ph_cplace is l)
318
+ ((0.571756 -0.819693))
319
+ ((lisp_coda_stop is 0)
320
+ ((R:SylStructure.parent.position_type is initial)
321
+ ((0.906891 -0.352911))
322
+ ((R:Segment.n.ph_ctype is r)
323
+ ((0.620335 -0.445714))
324
+ ((R:SylStructure.parent.parent.word_numsyls < 2.5)
325
+ ((R:Segment.p.ph_cvox is +)
326
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 0)
327
+ ((0.484057 -0.781483))
328
+ ((0.653917 -0.615429)))
329
+ ((0.754814 -0.531845)))
330
+ ((0.493988 -0.881596)))))
331
+ ((0.792979 -0.32648)))))
332
+ ((R:Segment.p.ph_cvox is +)
333
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.3)
334
+ ((lisp_coda_stop is 0)
335
+ ((0.913526 -0.195111))
336
+ ((0.56564 -0.64867)))
337
+ ((R:SylStructure.parent.position_type is single)
338
+ ((R:Segment.n.ph_cplace is a)
339
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 1)
340
+ ((0.790882 -0.488954))
341
+ ((0.780221 -0.185138)))
342
+ ((0.487794 -0.691338)))
343
+ ((R:Segment.p.ph_ctype is n)
344
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 1)
345
+ ((0.595729 -0.771698))
346
+ ((0.57908 -1.06592)))
347
+ ((R:Segment.pp.ph_vfront is 0)
348
+ ((0.591417 -0.784735))
349
+ ((0.486298 -0.436971))))))
350
+ ((ph_vlng is 0)
351
+ ((0.629869 -0.960652))
352
+ ((R:Segment.n.ph_ctype is r)
353
+ ((R:Segment.nn.ph_cplace is 0)
354
+ ((0.591783 -0.671576))
355
+ ((R:Segment.nn.ph_cvox is +)
356
+ ((0.365135 -0.822844))
357
+ ((0.428573 -0.988434))))
358
+ ((lisp_coda_stop is 0)
359
+ ((R:Segment.p.ph_cplace is a)
360
+ ((R:Segment.n.ph_cplace is a)
361
+ ((0.428189 -0.730057))
362
+ ((0.337443 -0.861764)))
363
+ ((0.57354 -0.494602)))
364
+ ((0.497606 -0.414451))))))))))
365
+ ((ph_vlng is l)
366
+ ((R:Segment.pp.ph_vfront is 1)
367
+ ((0.937199 0.833877))
368
+ ((R:SylStructure.parent.syl_out < 12.7)
369
+ ((0.729202 0.344121))
370
+ ((0.71086 0.101855))))
371
+ ((syl_initial is 0)
372
+ ((R:Segment.p.ph_ctype is r)
373
+ ((R:Segment.nn.ph_cplace is a)
374
+ ((0.844815 0.175273))
375
+ ((0.662523 -0.297527)))
376
+ ((ph_vlng is 0)
377
+ ((R:Segment.p.ph_ctype is s)
378
+ ((R:SylStructure.parent.syl_out < 14.6)
379
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 0)
380
+ ((0.665332 -0.610529))
381
+ ((0.42276 -0.848942)))
382
+ ((0.427946 -0.980726)))
383
+ ((R:SylStructure.parent.position_type is single)
384
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 1)
385
+ ((0.523367 -0.825038))
386
+ ((0.635654 -0.535303)))
387
+ ((R:SylStructure.parent.position_type is final)
388
+ ((0.515996 -0.707614))
389
+ ((ph_cplace is a)
390
+ ((lisp_coda_stop is 0)
391
+ ((0.689738 0.0446601))
392
+ ((0.698347 -0.268593)))
393
+ ((R:Segment.nn.ph_cplace is a)
394
+ ((0.706504 -0.659172))
395
+ ((0.775589 -0.201769)))))))
396
+ ((0.79472 -0.0539192))))
397
+ ((ph_ctype is s)
398
+ ((R:SylStructure.parent.position_type is single)
399
+ ((R:Segment.p.ph_ctype is f)
400
+ ((0.641302 0.532411))
401
+ ((R:Segment.n.ph_vrnd is +)
402
+ ((0.800655 0.325651))
403
+ ((0.894711 0.0487864))))
404
+ ((R:SylStructure.parent.position_type is initial)
405
+ ((R:Segment.nn.ph_cplace is a)
406
+ ((0.618082 -0.0190591))
407
+ ((0.733637 0.156329)))
408
+ ((ph_cplace is a)
409
+ ((R:SylStructure.parent.parent.word_numsyls < 2.3)
410
+ ((0.372869 -0.0827845))
411
+ ((0.494988 0.0882778)))
412
+ ((0.593526 -0.335404)))))
413
+ ((R:Segment.p.ph_cvox is +)
414
+ ((R:Segment.p.ph_ctype is n)
415
+ ((R:SylStructure.parent.syl_out < 5.4)
416
+ ((1.0207 -0.152517))
417
+ ((R:SylStructure.parent.R:Syllable.p.stress is 0)
418
+ ((0.711277 -0.513467))
419
+ ((0.509207 -0.726794))))
420
+ ((ph_cplace is g)
421
+ ((0.545188 -0.568352))
422
+ ((R:Segment.p.ph_cplace is a)
423
+ ((ph_ctype is n)
424
+ ((0.61149 -0.325094))
425
+ ((R:SylStructure.parent.position_type is single)
426
+ ((R:Segment.p.ph_ctype is r)
427
+ ((0.525282 0.395446))
428
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 1)
429
+ ((0.85778 0.0760293))
430
+ ((0.704055 0.290369))))
431
+ ((R:Segment.pp.ph_vfront is 0)
432
+ ((0.590093 0.136983))
433
+ ((0.734563 -0.0570759)))))
434
+ ((R:Segment.pp.ph_vfront is 2)
435
+ ((0.519485 -0.477174))
436
+ ((0.707546 -0.13584))))))
437
+ ((R:SylStructure.parent.position_type is single)
438
+ ((R:Segment.p.ph_ctype is f)
439
+ ((0.797877 0.00462775))
440
+ ((R:Segment.pp.ph_vfront is 1)
441
+ ((0.852184 -0.259914))
442
+ ((0.65313 -0.492506))))
443
+ ((R:SylStructure.parent.position_type is initial)
444
+ ((0.662516 -0.45585))
445
+ ((lisp_onset_glide is 0)
446
+ ((0.652534 -0.652428))
447
+ ((0.482818 -0.885728))))))))))))
448
+ ((syl_initial is 0)
449
+ ((ph_cplace is 0)
450
+ ((R:SylStructure.parent.position_type is single)
451
+ ((R:Segment.n.ph_ctype is f)
452
+ ((R:Segment.p.ph_cplace is a)
453
+ ((R:Segment.n.ph_cplace is a)
454
+ ((R:Segment.pp.ph_vfront is 0)
455
+ ((1.06157 1.30945))
456
+ ((1.12041 1.85843)))
457
+ ((1.05622 0.921414)))
458
+ ((R:Segment.nn.ph_cvox is -)
459
+ ((1.03073 0.916168))
460
+ ((1.06857 0.452851))))
461
+ ((R:Segment.p.ph_ctype is r)
462
+ ((R:Segment.n.ph_cplace is v)
463
+ ((1.22144 0.672433))
464
+ ((R:Segment.p.ph_cplace is l)
465
+ ((0.859749 -0.315152))
466
+ ((R:Segment.nn.ph_cvox is -)
467
+ ((0.89862 0.131037))
468
+ ((0.760033 -0.121252)))))
469
+ ((R:SylStructure.parent.syl_out < 8.8)
470
+ ((R:SylStructure.parent.syl_out < 0.8)
471
+ ((1.06821 1.63716))
472
+ ((R:Segment.n.ph_cplace is a)
473
+ ((R:Segment.p.ph_cvox is +)
474
+ ((1.04477 0.581686))
475
+ ((R:Segment.nn.ph_cvox is +)
476
+ ((0.769059 0.301576))
477
+ ((0.953428 0.0764058))))
478
+ ((R:Segment.p.ph_cplace is a)
479
+ ((1.01367 0.507761))
480
+ ((1.2827 0.945031)))))
481
+ ((R:Segment.n.ph_cplace is l)
482
+ ((0.618397 -0.0873608))
483
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 0)
484
+ ((R:Segment.p.ph_cvox is +)
485
+ ((0.817182 0.477262))
486
+ ((0.792181 -0.0592145)))
487
+ ((R:SylStructure.parent.R:Syllable.p.stress is 0)
488
+ ((R:SylStructure.parent.syl_out < 16)
489
+ ((0.995411 0.497843))
490
+ ((0.784087 0.152266)))
491
+ ((1.11816 0.716352))))))))
492
+ ((R:Segment.n.ph_ctype is f)
493
+ ((R:SylStructure.parent.position_type is final)
494
+ ((1.35724 1.06028))
495
+ ((R:Segment.p.ph_ctype is r)
496
+ ((R:SylStructure.parent.syl_out < 8.6)
497
+ ((0.511716 -0.0833005))
498
+ ((0.492142 -0.30212)))
499
+ ((R:Segment.n.ph_cplace is b)
500
+ ((0.53059 0.00266551))
501
+ ((R:SylStructure.parent.parent.word_numsyls < 2.3)
502
+ ((ph_vlng is l)
503
+ ((0.433396 0.821463))
504
+ ((0.66915 0.415614)))
505
+ ((0.501369 0.154721))))))
506
+ ((R:SylStructure.parent.position_type is final)
507
+ ((R:Segment.n.ph_ctype is s)
508
+ ((1.03896 0.524706))
509
+ ((R:SylStructure.parent.R:Syllable.p.stress is 0)
510
+ ((1.15147 0.428386))
511
+ ((R:Segment.p.ph_cplace is a)
512
+ ((0.919929 0.0314637))
513
+ ((0.716168 -0.366629)))))
514
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 4)
515
+ ((0.816778 0.408786))
516
+ ((lisp_onset_glide is 0)
517
+ ((R:Segment.p.ph_ctype is n)
518
+ ((R:Segment.n.ph_ctype is s)
519
+ ((0.532911 -0.153851))
520
+ ((0.633518 -0.762353)))
521
+ ((R:Segment.p.ph_cvox is -)
522
+ ((R:Segment.p.ph_cplace is g)
523
+ ((0.618376 -0.593197))
524
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 1)
525
+ ((R:Segment.pp.ph_vfront is 0)
526
+ ((R:Segment.n.ph_ctype is n)
527
+ ((0.554085 -0.058903))
528
+ ((R:Segment.p.ph_cplace is a)
529
+ ((0.59842 -0.174458))
530
+ ((0.585539 -0.349335))))
531
+ ((0.500857 -0.416613)))
532
+ ((R:SylStructure.parent.syl_out < 7)
533
+ ((0.616683 -0.00213272))
534
+ ((0.631444 -0.141773)))))
535
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 0)
536
+ ((0.5198 -0.151901))
537
+ ((ph_vlng is s)
538
+ ((0.677428 0.203522))
539
+ ((0.780789 0.375429))))))
540
+ ((R:Segment.nn.ph_cplace is a)
541
+ ((R:SylStructure.parent.R:Syllable.p.stress is 0)
542
+ ((0.594604 -0.27832))
543
+ ((0.736114 -0.422756)))
544
+ ((R:Segment.p.ph_cplace is a)
545
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 1)
546
+ ((0.512186 -0.732785))
547
+ ((0.550759 -0.506471)))
548
+ ((0.47297 -0.791841)))))))))
549
+ ((R:Segment.p.ph_ctype is 0)
550
+ ((R:SylStructure.parent.position_type is final)
551
+ ((lisp_coda_stop is 0)
552
+ ((ph_ctype is f)
553
+ ((R:Segment.nn.ph_cplace is 0)
554
+ ((1.00978 0.366105))
555
+ ((0.80682 -0.0827529)))
556
+ ((R:Segment.n.ph_cplace is a)
557
+ ((R:Segment.nn.ph_cvox is -)
558
+ ((1.07097 1.77503))
559
+ ((1.14864 1.14754)))
560
+ ((R:Segment.n.ph_vrnd is -)
561
+ ((0.883474 0.286471))
562
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 1)
563
+ ((1.22264 0.884142))
564
+ ((1.03401 0.658192))))))
565
+ ((ph_cplace is a)
566
+ ((R:SylStructure.parent.syl_out < 6.4)
567
+ ((R:SylStructure.parent.syl_out < 0.6)
568
+ ((1.07956 0.602849))
569
+ ((1.12301 0.0555897)))
570
+ ((R:SylStructure.parent.R:Syllable.p.stress is 0)
571
+ ((0.898888 -0.17527))
572
+ ((0.940932 0.274301))))
573
+ ((1.10093 -0.68098))))
574
+ ((R:Segment.n.ph_ctype is s)
575
+ ((ph_cplace is v)
576
+ ((0.639932 -1.33353))
577
+ ((R:SylStructure.parent.position_type is single)
578
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 0)
579
+ ((lisp_coda_stop is 0)
580
+ ((0.822882 -0.131692))
581
+ ((0.971957 -0.385365)))
582
+ ((R:Segment.nn.ph_cvox is -)
583
+ ((1.06611 0.183678))
584
+ ((lisp_coda_stop is 0)
585
+ ((0.967183 0.0925019))
586
+ ((0.876026 -0.230108)))))
587
+ ((ph_ctype is f)
588
+ ((R:SylStructure.parent.syl_out < 13)
589
+ ((0.589198 -0.655594))
590
+ ((0.476651 -0.926625)))
591
+ ((R:SylStructure.parent.syl_out < 5)
592
+ ((0.682936 -0.227662))
593
+ ((R:SylStructure.parent.R:Syllable.p.stress is 0)
594
+ ((R:Segment.nn.ph_cplace is a)
595
+ ((0.447309 -0.700998))
596
+ ((0.626113 -0.468853)))
597
+ ((0.657893 -0.383607)))))))
598
+ ((ph_ctype is r)
599
+ ((R:Segment.nn.ph_cvox is -)
600
+ ((1.15158 1.15233))
601
+ ((R:Segment.n.ph_vrnd is -)
602
+ ((1.05554 0.533749))
603
+ ((0.955478 0.0841894))))
604
+ ((ph_ctype is l)
605
+ ((R:Segment.n.ph_ctype is 0)
606
+ ((R:Segment.nn.ph_cplace is a)
607
+ ((0.766431 0.28943))
608
+ ((1.48633 1.09574)))
609
+ ((R:SylStructure.parent.position_type is single)
610
+ ((1.01777 0.474653))
611
+ ((0.545859 -0.402743))))
612
+ ((R:SylStructure.parent.syl_out < 4.8)
613
+ ((R:Segment.n.ph_vc is +)
614
+ ((ph_ctype is n)
615
+ ((0.776645 -0.433859))
616
+ ((R:SylStructure.parent.R:Syllable.p.stress is 0)
617
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 0)
618
+ ((0.776179 0.23435))
619
+ ((R:SylStructure.parent.parent.word_numsyls < 2.2)
620
+ ((0.744272 -0.0859672))
621
+ ((0.782605 0.115647))))
622
+ ((0.626541 -0.167615))))
623
+ ((R:Segment.n.seg_onsetcoda is coda)
624
+ ((1.28499 0.864144))
625
+ ((ph_cplace is a)
626
+ ((0.926103 0.0435837))
627
+ ((0.839172 -0.189514)))))
628
+ ((R:Segment.n.ph_ctype is n)
629
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.1)
630
+ ((0.973489 -0.203415))
631
+ ((0.777589 -0.849733)))
632
+ ((ph_ctype is n)
633
+ ((R:SylStructure.parent.position_type is initial)
634
+ ((R:Segment.n.ph_vc is +)
635
+ ((0.743482 -0.53384))
636
+ ((0.619309 -0.0987861)))
637
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
638
+ ((1.15555 0.0786295))
639
+ ((1.06689 0.681662))))
640
+ ((R:Segment.n.ph_ctype is r)
641
+ ((R:SylStructure.parent.syl_out < 8.9)
642
+ ((0.752079 -0.237421))
643
+ ((R:SylStructure.parent.R:Syllable.p.stress is 0)
644
+ ((0.664182 -0.041521))
645
+ ((0.772712 0.103499))))
646
+ ((R:Segment.n.seg_onsetcoda is coda)
647
+ ((R:SylStructure.parent.position_type is mid)
648
+ ((R:SylStructure.parent.parent.word_numsyls < 3.3)
649
+ ((0.715944 -0.275113))
650
+ ((0.675729 0.202848)))
651
+ ((R:Segment.n.ph_vrnd is -)
652
+ ((R:SylStructure.parent.syl_out < 8.3)
653
+ ((ph_ctype is s)
654
+ ((0.82747 -0.116723))
655
+ ((0.689586 -0.303909)))
656
+ ((R:SylStructure.parent.syl_out < 17.7)
657
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 0)
658
+ ((0.659686 -0.621268))
659
+ ((ph_cplace is a)
660
+ ((0.861741 -0.285324))
661
+ ((0.507102 -0.444082))))
662
+ ((0.850664 -0.269084))))
663
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 0)
664
+ ((0.878643 -0.255833))
665
+ ((0.98882 0.115252)))))
666
+ ((ph_cplace is a)
667
+ ((R:SylStructure.parent.syl_out < 13)
668
+ ((0.850625 -0.289333))
669
+ ((0.788154 -0.44844)))
670
+ ((0.70482 -0.630276))))))))))))
671
+ ((R:Segment.p.ph_ctype is l)
672
+ ((R:SylStructure.parent.position_type is single)
673
+ ((0.873748 -0.21639))
674
+ ((lisp_coda_stop is 0)
675
+ ((0.71002 0.428132))
676
+ ((0.703501 0.015833))))
677
+ ((ph_vlng is 0)
678
+ ((R:Segment.p.ph_ctype is r)
679
+ ((R:SylStructure.parent.position_type is initial)
680
+ ((0.907151 -0.494409))
681
+ ((ph_ctype is s)
682
+ ((0.782539 -0.398555))
683
+ ((R:Segment.p.ph_cplace is 0)
684
+ ((0.767435 -0.298857))
685
+ ((0.767046 0.151217)))))
686
+ ((ph_cplace is a)
687
+ ((R:Segment.n.ph_ctype is r)
688
+ ((R:SylStructure.parent.R:Syllable.p.stress is 0)
689
+ ((0.689367 0.0195991))
690
+ ((0.64446 -0.256648)))
691
+ ((R:Segment.n.ph_vc is +)
692
+ ((ph_ctype is s)
693
+ ((R:Segment.nn.ph_cvox is +)
694
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 1)
695
+ ((0.59482 -0.214443))
696
+ ((0.745691 0.0292177)))
697
+ ((0.523103 -0.391245)))
698
+ ((R:Segment.p.ph_cvox is +)
699
+ ((R:Segment.p.ph_cplace is a)
700
+ ((0.524304 -0.428306))
701
+ ((0.605117 -0.165604)))
702
+ ((R:Segment.p.ph_ctype is f)
703
+ ((0.491251 -0.455353))
704
+ ((lisp_coda_stop is 0)
705
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 1)
706
+ ((0.175021 -1.02136))
707
+ ((0.264113 -0.976809)))
708
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.3)
709
+ ((0.704803 -0.716976))
710
+ ((0.300317 -0.924727)))))))
711
+ ((ph_ctype is f)
712
+ ((R:SylStructure.parent.syl_out < 13)
713
+ ((R:Segment.n.ph_ctype is s)
714
+ ((0.731994 -0.711044))
715
+ ((0.768008 -0.415076)))
716
+ ((0.691821 -0.803284)))
717
+ ((R:Segment.nn.ph_cplace is 0)
718
+ ((R:Segment.n.ph_cplace is a)
719
+ ((0.569567 -0.993506))
720
+ ((0.689849 -0.761696)))
721
+ ((0.386818 -1.14744))))))
722
+ ((R:Segment.p.seg_onsetcoda is coda)
723
+ ((R:Segment.p.ph_cplace is a)
724
+ ((0.746337 -0.866206))
725
+ ((0.532751 -1.22185)))
726
+ ((ph_cplace is l)
727
+ ((0.74942 -0.820648))
728
+ ((0.685988 -0.298146))))))
729
+ ((0.812766 0.17291))))))
730
+ ((R:SylStructure.parent.position_type is mid)
731
+ ((ph_ctype is r)
732
+ ((0.577775 -0.54714))
733
+ ((R:Segment.n.ph_ctype is f)
734
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 0)
735
+ ((0.370448 0.00076407))
736
+ ((0.460385 0.20631)))
737
+ ((R:Segment.p.ph_cvox is -)
738
+ ((ph_vlng is 0)
739
+ ((0.615959 -0.57434))
740
+ ((0.50852 -0.197814)))
741
+ ((R:Segment.n.ph_ctype is 0)
742
+ ((1.34281 0.477163))
743
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 1)
744
+ ((0.59975 -0.1342))
745
+ ((0.640294 -0.32653)))))))
746
+ ((R:Segment.n.ph_ctype is f)
747
+ ((R:SylStructure.parent.position_type is initial)
748
+ ((0.758739 0.311943))
749
+ ((R:Segment.n.seg_onsetcoda is coda)
750
+ ((R:Segment.p.ph_ctype is f)
751
+ ((1.28746 1.99771))
752
+ ((R:Segment.pp.ph_vfront is 1)
753
+ ((1.42474 1.76925))
754
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 1)
755
+ ((0.979414 1.37583))
756
+ ((1.00321 1.06671)))))
757
+ ((1.15222 0.852004))))
758
+ ((R:Segment.p.ph_ctype is 0)
759
+ ((R:Segment.n.ph_ctype is s)
760
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 1)
761
+ ((0.664807 -0.0880262))
762
+ ((0.573589 0.217234)))
763
+ ((ph_ctype is s)
764
+ ((ph_cplace is l)
765
+ ((0.800348 0.66579))
766
+ ((ph_cplace is a)
767
+ ((0.859133 1.46854))
768
+ ((R:SylStructure.parent.position_type is single)
769
+ ((0.692229 1.23671))
770
+ ((0.552426 0.923928)))))
771
+ ((R:SylStructure.parent.syl_out < 9.2)
772
+ ((R:SylStructure.parent.position_type is single)
773
+ ((R:SylStructure.parent.syl_out < 3.6)
774
+ ((1.01673 1.26824))
775
+ ((0.848274 0.92375)))
776
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 1)
777
+ ((R:Segment.nn.ph_cplace is a)
778
+ ((0.788163 0.818855))
779
+ ((0.822028 1.01227)))
780
+ ((0.8365 0.483313))))
781
+ ((lisp_coda_stop is 0)
782
+ ((R:Segment.nn.ph_cvox is +)
783
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
784
+ ((0.807795 0.670829))
785
+ ((0.773774 0.435486)))
786
+ ((0.849529 0.103561)))
787
+ ((0.858848 0.763836))))))
788
+ ((R:Segment.n.ph_vrnd is -)
789
+ ((ph_vlng is 0)
790
+ ((R:SylStructure.parent.position_type is final)
791
+ ((ph_cplace is a)
792
+ ((R:Segment.nn.ph_cvox is -)
793
+ ((0.691915 -0.42124))
794
+ ((R:Segment.p.ph_cplace is a)
795
+ ((0.773696 0.354001))
796
+ ((0.65495 -0.14321))))
797
+ ((0.610433 -0.479739)))
798
+ ((R:Segment.p.ph_ctype is r)
799
+ ((R:SylStructure.parent.R:Syllable.n.syl_break is 0)
800
+ ((0.560921 0.384674))
801
+ ((0.895267 0.746476)))
802
+ ((R:Segment.p.ph_ctype is l)
803
+ ((0.704694 0.568012))
804
+ ((R:Segment.p.ph_cplace is b)
805
+ ((1.34739 0.539049))
806
+ ((R:Segment.p.ph_ctype is s)
807
+ ((R:SylStructure.parent.syl_out < 12.9)
808
+ ((R:SylStructure.parent.R:Syllable.p.stress is 0)
809
+ ((0.807285 0.151429))
810
+ ((0.988033 0.383763)))
811
+ ((0.878655 0.102291)))
812
+ ((ph_ctype is n)
813
+ ((0.759582 -0.315096))
814
+ ((R:SylStructure.parent.syl_out < 8.8)
815
+ ((R:Segment.pp.ph_vfront is 0)
816
+ ((0.846546 0.000647117))
817
+ ((R:Segment.pp.ph_vfront is 1)
818
+ ((0.586216 0.150701))
819
+ ((0.793898 0.379041))))
820
+ ((lisp_coda_stop is 0)
821
+ ((ph_ctype is f)
822
+ ((0.74736 -0.31103))
823
+ ((0.715751 -0.00576581)))
824
+ ((0.914486 0.17528))))))))))
825
+ ((1.24204 0.908819)))
826
+ ((ph_ctype is s)
827
+ ((ph_cplace is a)
828
+ ((0.864408 1.35528))
829
+ ((R:Segment.n.seg_onsetcoda is coda)
830
+ ((0.85602 0.344576))
831
+ ((0.869622 0.659223))))
832
+ ((R:Segment.nn.ph_cvox is -)
833
+ ((R:Segment.n.ph_ctype is s)
834
+ ((R:Segment.nn.ph_cplace is 0)
835
+ ((0.942964 1.27475))
836
+ ((0.978218 0.650268)))
837
+ ((R:SylStructure.parent.syl_out < 3.9)
838
+ ((R:SylStructure.parent.R:Syllable.p.stress is 0)
839
+ ((1.32463 1.05026))
840
+ ((0.896966 0.417727)))
841
+ ((R:Segment.p.ph_cplace is a)
842
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 0)
843
+ ((0.776698 0.195369))
844
+ ((0.969518 0.432394)))
845
+ ((0.799096 -0.0203318)))))
846
+ ((ph_cplace is a)
847
+ ((R:SylStructure.parent.R:Syllable.p.stress is 0)
848
+ ((0.680861 -0.315846))
849
+ ((R:SylStructure.parent.R:Syllable.nn.syl_break is 1)
850
+ ((0.954393 0.0965487))
851
+ ((0.884928 0.372884))))
852
+ ((lisp_coda_stop is 0)
853
+ ((R:SylStructure.parent.R:Syllable.p.stress is 0)
854
+ ((R:SylStructure.parent.position_type is final)
855
+ ((1.03696 0.565834))
856
+ ((0.906661 0.277961)))
857
+ ((R:SylStructure.parent.position_type is final)
858
+ ((0.778429 -0.0967381))
859
+ ((0.863993 0.314023))))
860
+ ((R:Segment.p.ph_cplace is a)
861
+ ((R:SylStructure.parent.R:Syllable.p.stress is 0)
862
+ ((0.898898 0.571009))
863
+ ((0.830278 0.787486)))
864
+ ((1.1101 0.333888)))))))))))))
865
+ ;; RMSE 0.7726 Correlation is 0.5943 Mean (abs) Error 0.5752 (0.5160)
866
+
867
+ ))
868
+
869
+ (provide 'f2bdurtreeZ)
CosyVoice-ttsfrd/resource/festival/f2bf0lr.scm ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; First attempt at a linear regression model to predict F0 values.
35
+ ;;; This is an attempt to reimplement the work in Black and
36
+ ;;; Hunt ICSLP96, though this model probably isn't as good.
37
+ ;;;
38
+
39
+ ;;;start
40
+ ;;; R2 = 0.251, F(74, 12711) = 57.5, Prob>F = 0.000
41
+ ;;; RMSE = 27.877
42
+ ;;;mid
43
+ ;;; R2 = 0.332, F(74, 12711) = 85.6, Prob>F = 0.000
44
+ ;;; RMSE = 28.293
45
+ ;;;end
46
+ ;;; R2 = 0.292, F(74, 12711) = 70.8, Prob>F = 0.000
47
+ ;;; RMSE = 27.139
48
+
49
+ (define (emph_syl syl)
50
+ (if (string-equal (item.feat syl "tobi_accent") "NONE")
51
+ 0.0
52
+ (if (string-equal (item.feat
53
+ syl "R:SylStructure.parent.R:Token.parent.EMPH") "1")
54
+ 2.0
55
+ 0.0)))
56
+
57
+ (set! f2b_f0_lr_start
58
+ '(
59
+ ( Intercept 160.584956 )
60
+ ( R:SylStructure.parent.R:Token.parent.EMPH 10.0 )
61
+ ( pp.tobi_accent 10.081770 (H*) )
62
+ ( pp.tobi_accent 3.358613 (!H*) )
63
+ ( pp.tobi_accent 4.144342 (*? X*? H*!H* * L+H* L+!H*) )
64
+ ( pp.tobi_accent -1.111794 (L*) )
65
+ ( pp.tobi_accent 19.646313 (L*+H L*+!H) )
66
+ ( p.tobi_accent 32.081029 (H*) )
67
+ ( p.tobi_accent 18.090033 (!H*) )
68
+ ( p.tobi_accent 23.255280 (*? X*? H*!H* * L+H* L+!H*) )
69
+ ( p.tobi_accent -9.623577 (L*) )
70
+ ( p.tobi_accent 26.517095 (L*+H L*+!H) )
71
+ ( tobi_accent 5.221081 (H*) )
72
+ ( tobi_accent 10.159194 (!H*) )
73
+ ( tobi_accent 3.645511 (*? X*? H*!H* * L+H* L+!H*) )
74
+ ( tobi_accent -5.720030 (L*) )
75
+ ( tobi_accent -6.355773 (L*+H L*+!H) )
76
+ ( n.tobi_accent -5.691933 (H*) )
77
+ ( n.tobi_accent 8.265606 (!H*) )
78
+ ( n.tobi_accent 0.861427 (*? X*? H*!H* * L+H* L+!H*) )
79
+ ( n.tobi_accent 1.270504 (L*) )
80
+ ( n.tobi_accent 3.499418 (L*+H L*+!H) )
81
+ ( nn.tobi_accent -3.785701 (H*) )
82
+ ( nn.tobi_accent 7.013446 (!H*) )
83
+ ( nn.tobi_accent 2.637494 (*? X*? H*!H* * L+H* L+!H*) )
84
+ ( nn.tobi_accent -0.392176 (L*) )
85
+ ( nn.tobi_accent -2.957502 (L*+H L*+!H) )
86
+ ( pp.tobi_endtone -3.531153 (L-L%) )
87
+ ( pp.tobi_endtone 0.131156 (L-) )
88
+ ( pp.tobi_endtone 2.729199 (H-L% !H-L% -X?) )
89
+ ( pp.tobi_endtone 8.258756 (L-H%) )
90
+ ( pp.tobi_endtone 5.836487 (H-) )
91
+ ( pp.tobi_endtone 11.213440 (!H- H-H%) )
92
+ ( R:Syllable.p.tobi_endtone -28.081359 (L-L%) )
93
+ ( R:Syllable.p.tobi_endtone -20.553145 (L-) )
94
+ ( R:Syllable.p.tobi_endtone -5.442577 (H-L% !H-L% -X?) )
95
+ ( R:Syllable.p.tobi_endtone -6.585836 (L-H%) )
96
+ ( R:Syllable.p.tobi_endtone 8.537044 (H-) )
97
+ ( R:Syllable.p.tobi_endtone 4.243342 (!H- H-H%) )
98
+ ( tobi_endtone -9.333926 (L-L%) )
99
+ ( tobi_endtone -0.346711 (L-) )
100
+ ( tobi_endtone -0.507352 (H-L% !H-L% -X?) )
101
+ ( tobi_endtone -0.937483 (L-H%) )
102
+ ( tobi_endtone 9.472265 (H-) )
103
+ ( tobi_endtone 14.256898 (!H- H-H%) )
104
+ ( n.tobi_endtone -13.084253 (L-L%) )
105
+ ( n.tobi_endtone -1.060688 (L-) )
106
+ ( n.tobi_endtone -7.947205 (H-L% !H-L% -X?) )
107
+ ( n.tobi_endtone -5.471592 (L-H%) )
108
+ ( n.tobi_endtone -0.095669 (H-) )
109
+ ( n.tobi_endtone 4.933708 (!H- H-H%) )
110
+ ( nn.tobi_endtone -14.993470 (L-L%) )
111
+ ( nn.tobi_endtone -3.784284 (L-) )
112
+ ( nn.tobi_endtone -15.505132 (H-L% !H-L% -X?) )
113
+ ( nn.tobi_endtone -11.352400 (L-H%) )
114
+ ( nn.tobi_endtone -5.551627 (H-) )
115
+ ( nn.tobi_endtone -0.661581 (!H- H-H%) )
116
+ ( pp.old_syl_break -3.367677 )
117
+ ( p.old_syl_break 0.641755 )
118
+ ( old_syl_break -0.659002 )
119
+ ( n.old_syl_break 1.217358 )
120
+ ( nn.old_syl_break 2.974502 )
121
+ ( pp.stress 1.588098 )
122
+ ( p.stress 3.693430 )
123
+ ( stress 2.009843 )
124
+ ( n.stress 1.645560 )
125
+ ( nn.stress 1.926870 )
126
+ ( syl_in 1.048362 )
127
+ ( syl_out 0.315553 )
128
+ ( ssyl_in -2.096079 )
129
+ ( ssyl_out 0.303531 )
130
+ ( asyl_in -4.257915 )
131
+ ( asyl_out -2.422424 )
132
+ ( last_accent -0.397647 )
133
+ ( next_accent -0.418613 )
134
+ ( sub_phrases -5.472055 )
135
+ ))
136
+
137
+ (set! f2b_f0_lr_mid
138
+ '(
139
+ ( Intercept 169.183377 )
140
+ ( R:SylStructure.parent.R:Token.parent.EMPH 10.0 )
141
+ ( pp.tobi_accent 4.923247 (H*) )
142
+ ( pp.tobi_accent 0.955474 (!H*) )
143
+ ( pp.tobi_accent 1.193597 (*? X*? H*!H* * L+H* L+!H*) )
144
+ ( pp.tobi_accent 1.501383 (L*) )
145
+ ( pp.tobi_accent 7.992120 (L*+H L*+!H) )
146
+ ( p.tobi_accent 16.603350 (H*) )
147
+ ( p.tobi_accent 11.665814 (!H*) )
148
+ ( p.tobi_accent 13.063298 (*? X*? H*!H* * L+H* L+!H*) )
149
+ ( p.tobi_accent -2.288798 (L*) )
150
+ ( p.tobi_accent 29.168430 (L*+H L*+!H) )
151
+ ( tobi_accent 34.517868 (H*) )
152
+ ( tobi_accent 22.349656 (!H*) )
153
+ ( tobi_accent 23.551548 (*? X*? H*!H* * L+H* L+!H*) )
154
+ ( tobi_accent -14.117284 (L*) )
155
+ ( tobi_accent -5.978760 (L*+H L*+!H) )
156
+ ( n.tobi_accent -1.914945 (H*) )
157
+ ( n.tobi_accent 5.249441 (!H*) )
158
+ ( n.tobi_accent -1.929947 (*? X*? H*!H* * L+H* L+!H*) )
159
+ ( n.tobi_accent -3.287877 (L*) )
160
+ ( n.tobi_accent -4.980375 (L*+H L*+!H) )
161
+ ( nn.tobi_accent -6.147251 (H*) )
162
+ ( nn.tobi_accent 8.408949 (!H*) )
163
+ ( nn.tobi_accent 3.193500 (*? X*? H*!H* * L+H* L+!H*) )
164
+ ( nn.tobi_accent 1.323099 (L*) )
165
+ ( nn.tobi_accent 9.148058 (L*+H L*+!H) )
166
+ ( pp.tobi_endtone 4.255273 (L-L%) )
167
+ ( pp.tobi_endtone -1.033377 (L-) )
168
+ ( pp.tobi_endtone 11.992045 (H-L% !H-L% -X?) )
169
+ ( pp.tobi_endtone 6.989573 (L-H%) )
170
+ ( pp.tobi_endtone 2.598854 (H-) )
171
+ ( pp.tobi_endtone 12.178307 (!H- H-H%) )
172
+ ( R:Syllable.p.tobi_endtone -4.397973 (L-L%) )
173
+ ( R:Syllable.p.tobi_endtone -6.157077 (L-) )
174
+ ( R:Syllable.p.tobi_endtone 5.530608 (H-L% !H-L% -X?) )
175
+ ( R:Syllable.p.tobi_endtone 6.938086 (L-H%) )
176
+ ( R:Syllable.p.tobi_endtone 6.162763 (H-) )
177
+ ( R:Syllable.p.tobi_endtone 8.035727 (!H- H-H%) )
178
+ ( tobi_endtone -19.357902 (L-L%) )
179
+ ( tobi_endtone -13.877759 (L-) )
180
+ ( tobi_endtone -6.176061 (H-L% !H-L% -X?) )
181
+ ( tobi_endtone -7.328882 (L-H%) )
182
+ ( tobi_endtone 12.694193 (H-) )
183
+ ( tobi_endtone 30.923398 (!H- H-H%) )
184
+ ( n.tobi_endtone -17.727785 (L-L%) )
185
+ ( n.tobi_endtone -2.539592 (L-) )
186
+ ( n.tobi_endtone -8.126830 (H-L% !H-L% -X?) )
187
+ ( n.tobi_endtone -8.701685 (L-H%) )
188
+ ( n.tobi_endtone -1.006439 (H-) )
189
+ ( n.tobi_endtone 6.834498 (!H- H-H%) )
190
+ ( nn.tobi_endtone -15.407530 (L-L%) )
191
+ ( nn.tobi_endtone -2.974196 (L-) )
192
+ ( nn.tobi_endtone -12.287673 (H-L% !H-L% -X?) )
193
+ ( nn.tobi_endtone -7.621437 (L-H%) )
194
+ ( nn.tobi_endtone -0.458837 (H-) )
195
+ ( nn.tobi_endtone 3.170632 (!H- H-H%) )
196
+ ( pp.old_syl_break -4.196950 )
197
+ ( p.old_syl_break -5.176929 )
198
+ ( old_syl_break 0.047922 )
199
+ ( n.old_syl_break 2.153968 )
200
+ ( nn.old_syl_break 2.577074 )
201
+ ( pp.stress -2.368192 )
202
+ ( p.stress 1.080493 )
203
+ ( stress 1.135556 )
204
+ ( n.stress 2.447219 )
205
+ ( nn.stress 1.318122 )
206
+ ( syl_in 0.291663 )
207
+ ( syl_out -0.411814 )
208
+ ( ssyl_in -1.643456 )
209
+ ( ssyl_out 0.580589 )
210
+ ( asyl_in -5.649243 )
211
+ ( asyl_out 0.489823 )
212
+ ( last_accent 0.216634 )
213
+ ( next_accent 0.244134 )
214
+ ( sub_phrases -5.758156 )
215
+ ))
216
+
217
+
218
+ (set! f2b_f0_lr_end
219
+ '(
220
+ ( Intercept 169.570381 )
221
+ ( R:SylStructure.parent.R:Token.parent.EMPH 10.0 )
222
+ ( pp.tobi_accent 3.594771 (H*) )
223
+ ( pp.tobi_accent 0.432519 (!H*) )
224
+ ( pp.tobi_accent 0.235664 (*? X*? H*!H* * L+H* L+!H*) )
225
+ ( pp.tobi_accent 1.513892 (L*) )
226
+ ( pp.tobi_accent 2.474823 (L*+H L*+!H) )
227
+ ( p.tobi_accent 11.214208 (H*) )
228
+ ( p.tobi_accent 9.619350 (!H*) )
229
+ ( p.tobi_accent 9.084690 (*? X*? H*!H* * L+H* L+!H*) )
230
+ ( p.tobi_accent 0.519202 (L*) )
231
+ ( p.tobi_accent 26.593112 (L*+H L*+!H) )
232
+ ( tobi_accent 25.217589 (H*) )
233
+ ( tobi_accent 13.759851 (!H*) )
234
+ ( tobi_accent 17.635192 (*? X*? H*!H* * L+H* L+!H*) )
235
+ ( tobi_accent -12.149974 (L*) )
236
+ ( tobi_accent 13.345913 (L*+H L*+!H) )
237
+ ( n.tobi_accent 4.944848 (H*) )
238
+ ( n.tobi_accent 7.398383 (!H*) )
239
+ ( n.tobi_accent 1.683011 (*? X*? H*!H* * L+H* L+!H*) )
240
+ ( n.tobi_accent -6.516900 (L*) )
241
+ ( n.tobi_accent -6.768201 (L*+H L*+!H) )
242
+ ( nn.tobi_accent -4.335797 (H*) )
243
+ ( nn.tobi_accent 5.656462 (!H*) )
244
+ ( nn.tobi_accent 0.263288 (*? X*? H*!H* * L+H* L+!H*) )
245
+ ( nn.tobi_accent 1.022002 (L*) )
246
+ ( nn.tobi_accent 6.702368 (L*+H L*+!H) )
247
+ ( pp.tobi_endtone 10.274958 (L-L%) )
248
+ ( pp.tobi_endtone 3.129947 (L-) )
249
+ ( pp.tobi_endtone 15.476240 (H-L% !H-L% -X?) )
250
+ ( pp.tobi_endtone 10.446935 (L-H%) )
251
+ ( pp.tobi_endtone 6.104384 (H-) )
252
+ ( pp.tobi_endtone 14.182688 (!H- H-H%) )
253
+ ( R:Syllable.p.tobi_endtone 1.767454 (L-L%) )
254
+ ( R:Syllable.p.tobi_endtone -1.040077 (L-) )
255
+ ( R:Syllable.p.tobi_endtone 18.438093 (H-L% !H-L% -X?) )
256
+ ( R:Syllable.p.tobi_endtone 8.750018 (L-H%) )
257
+ ( R:Syllable.p.tobi_endtone 5.000340 (H-) )
258
+ ( R:Syllable.p.tobi_endtone 10.913437 (!H- H-H%) )
259
+ ( tobi_endtone -12.637935 (L-L%) )
260
+ ( tobi_endtone -13.597961 (L-) )
261
+ ( tobi_endtone -6.501965 (H-L% !H-L% -X?) )
262
+ ( tobi_endtone 8.747483 (L-H%) )
263
+ ( tobi_endtone 15.165833 (H-) )
264
+ ( tobi_endtone 50.190326 (!H- H-H%) )
265
+ ( n.tobi_endtone -16.965781 (L-L%) )
266
+ ( n.tobi_endtone -5.222475 (L-) )
267
+ ( n.tobi_endtone -7.358555 (H-L% !H-L% -X?) )
268
+ ( n.tobi_endtone -7.833168 (L-H%) )
269
+ ( n.tobi_endtone 4.701087 (H-) )
270
+ ( n.tobi_endtone 10.349902 (!H- H-H%) )
271
+ ( nn.tobi_endtone -15.369483 (L-L%) )
272
+ ( nn.tobi_endtone -2.207161 (L-) )
273
+ ( nn.tobi_endtone -9.363835 (H-L% !H-L% -X?) )
274
+ ( nn.tobi_endtone -7.052374 (L-H%) )
275
+ ( nn.tobi_endtone 2.207854 (H-) )
276
+ ( nn.tobi_endtone 5.271546 (!H- H-H%) )
277
+ ( pp.old_syl_break -4.745862 )
278
+ ( p.old_syl_break -5.685178 )
279
+ ( old_syl_break -2.633291 )
280
+ ( n.old_syl_break 1.678340 )
281
+ ( nn.old_syl_break 2.274729 )
282
+ ( pp.stress -2.747198 )
283
+ ( p.stress 0.306724 )
284
+ ( stress -0.565613 )
285
+ ( n.stress 2.838327 )
286
+ ( nn.stress 1.285244 )
287
+ ( syl_in 0.169955 )
288
+ ( syl_out -1.045661 )
289
+ ( ssyl_in -1.487774 )
290
+ ( ssyl_out 0.752405 )
291
+ ( asyl_in -5.081677 )
292
+ ( asyl_out 3.016218 )
293
+ ( last_accent 0.312900 )
294
+ ( next_accent 0.837992 )
295
+ ( sub_phrases -5.397805 )
296
+
297
+ ))
298
+
299
+ ;; groups
300
+ ;; tobi_accent_1 25.217589 (H*) )
301
+ ;; tobi_accent_2 13.759851 (!H*) )
302
+ ;; tobi_accent_3 17.635192 (*? X*? H*!H* * L+H* L+!H*) )
303
+ ;; tobi_accent_4 -12.149974 (L*) )
304
+ ;; tobi_accent_5 13.345913 (L*+H L*+!H) )
305
+
306
+ ;; tobi_endtone_1 10.274958 (L-L%) )
307
+ ;; tobi_endtone_2 3.129947 (L-) )
308
+ ;; tobi_endtone_3 15.476240 (H-L% !H-L% -X?) )
309
+ ;; tobi_endtone_4 10.446935 (L-H%) )
310
+ ;; tobi_endtone_5 6.104384 (H-) )
311
+ ;; tobi_endtone_6 14.182688 (!H- H-H%) )
312
+
313
+ (provide 'f2bf0lr)
314
+
CosyVoice-ttsfrd/resource/festival/festdoc.scm ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;; Author: Alan W Black
34
+ ;;; Date: August 1996
35
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
36
+ ;;; Save documentation strings as texinfo files
37
+ ;;;
38
+ ;;; Finds all functions with documentation, and all variables with
39
+ ;;; documentation, sorts and dumps the information in doc/festfunc.texi
40
+ ;;; and doc/festvars.texi
41
+ ;;;
42
+ ;;; The makefile in the doc directory runs the compiled festival binary and
43
+ ;;; causes these files to be created form the currently defined functions
44
+ ;;; and variables
45
+ ;;;
46
+ ;;; Also provides function to extract manual section for documentation
47
+ ;;; string and send a url to Netscape to display it
48
+ ;;;
49
+
50
+ (define (make-doc)
51
+ "(make-doc)
52
+ Find function and variable document strings and save them in texinfo
53
+ format to respective files."
54
+ (format t "Making function, feature and variable lists\n")
55
+
56
+ ;; Need to ensure all library files are actually loaded if they contain
57
+ ;; funcstions/variables which have to be put in the manual
58
+ (require 'display)
59
+ (require 'mbrola)
60
+ (require 'tilt)
61
+
62
+ (make-a-doc "festfunc.texi" 'function)
63
+ (make-a-doc "festfeat.texi" 'features)
64
+ (make-a-doc "festvars.texi" 'vars))
65
+
66
+ (define (make-a-doc outfile doclist)
67
+ "(make-a-doc FILENAME DOCLIST)
68
+ Make a texinfo document in FILENAME as a texinfo table, items are
69
+ from DOCLIST. DOCLIST names which doclist to use, it may be
70
+ one of 'function, 'features or 'vars."
71
+ (let ((outfp (fopen outfile "wb")))
72
+ (format outfp "@table @code\n")
73
+ ;; Yes I am so lazy I'm not willing to write a sort function in Scheme
74
+ (sort-and-dump-docstrings doclist outfp)
75
+ (format outfp "@end table\n")
76
+ (fclose outfp)))
77
+
78
+ ;;;
79
+ ;;; Documentation string may refer to a section in the manual
80
+ ;;; If it does then we can automatically go to that section in the
81
+ ;;; menu using Netscape.
82
+ ;;;
83
+
84
+ (defvar manual-browser "netscape"
85
+ "manual-browser
86
+ The Unix program name of your Netscape Navigator browser.
87
+ [see Getting some help]")
88
+
89
+ (defvar manual-url
90
+ (format nil "http://www.cstr.ed.ac.uk/projects/festival/manual-%s.%s.%s/"
91
+ (car festival_version_number)
92
+ (car (cdr festival_version_number))
93
+ (car (cdr (cdr festival_version_number))))
94
+ "manual-url
95
+ The default URL for the Festival Manual in html format. You may
96
+ reset this to a file://.../... type URL on you're local machine.
97
+ [see Getting some help]")
98
+
99
+ ;;; Paul got this idea from VM, the email system for emacs and
100
+ ;;; I found out how to do this from their code, thanks Kyle
101
+
102
+ (define (send-url-to-netscape url)
103
+ "(send-url-to-netscape URL)
104
+ Send given URL to netscape for display. This is primarily used to
105
+ display parts of the manual referenced in documentation strings."
106
+ (system
107
+ (string-append
108
+ manual-browser
109
+ " -remote \"openURL( "
110
+ url
111
+ " )\" ")))
112
+
113
+ (define (lastline string)
114
+ "(lastline STRING)
115
+ Returns the part of the string which between the last newline and the
116
+ end of string."
117
+ (let ((ns (string-after string "\n")))
118
+ (if (string-equal ns "")
119
+ string
120
+ (lastline ns))))
121
+
122
+ (define (manual-sym symbol)
123
+ "(manual-sym SYMBOL)
124
+ Display the section in the manual that SYMBOL's docstring has
125
+ identified as the most relevant. The section is named on the
126
+ last line of a documentation string with no newlines within it
127
+ prefixed by \"[see \" with a \"]\" just immediately before the end
128
+ of the documentation string. The manual section name is translated to
129
+ the section in the HTML version of the manual and a URL is
130
+ and sent to Netscape for display. [see Getting some help]"
131
+ (let ((section (string-before (string-after
132
+ (lastline (eval (list 'doc symbol)))
133
+ "[see ")
134
+ "]")))
135
+ (cond
136
+ ((string-equal section "")
137
+ (eval (list 'doc symbol))) ;; nothing there
138
+ (t
139
+ (manual section)))))
140
+
141
+ (define (manual section)
142
+ "(manual SECTION)
143
+ Display SECTION in the manual. SECTION is a string identifying
144
+ a manual section (it could be an initial substring. If SECTION
145
+ is nil or unspecifed then the Manual table of contents is displayed.
146
+ This uses netscape to display the manual page so you must have that
147
+ (use variable manual-browser to identify it) and the variable
148
+ manual-url pointing to a copy of the manual. [see Getting some help]"
149
+ (let ((tmpfile (make_tmp_filename))
150
+ (manual-section))
151
+ (cond
152
+ ((string-matches section "\"")
153
+ (string-append "Invalid section reference containing quote: "
154
+ section "\n"))
155
+ ((not section)
156
+ (send-url-to-netscape (string-append manual-url "festival_toc.html")))
157
+ (t ;; find section in manual
158
+ (get_url (string-append manual-url "festival_toc.html") tmpfile)
159
+ (system
160
+ (string-append
161
+ "grep -i \"^<LI><A NAME.*" section "\" \"" tmpfile
162
+ "\" | sed 's/^.*HREF=.//' | sed 's/.>.*$//' > \""
163
+ tmpfile ".out\""))
164
+ (set! manual-section (load (string-append tmpfile ".out") t))
165
+ (cond
166
+ ((not manual-section)
167
+ (string-append "No section called: " section))
168
+ (t
169
+ (send-url-to-netscape (string-append manual-url (car manual-section)))
170
+ (delete-file tmpfile)
171
+ (delete-file (string-append tmpfile ".out"))
172
+ "Sent manual reference url to netscape."))))))
173
+
174
+ (provide 'festdoc)
175
+
176
+
177
+
178
+
CosyVoice-ttsfrd/resource/festival/festival.el ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;
2
+ ;;; File: festival.el
3
+ ;;; Emacs Lisp
4
+ ;;;
5
+ ;;; Alan W Black CSTR ([email protected]) June 1996
6
+ ;;;
7
+ ;;; Provide an emacs mode for interfacing to the festival speech
8
+ ;;; synthesizer system
9
+ ;;;
10
+ ;;; I've looked at many examples from the emacs Lisp directory
11
+ ;;; copying relevant bits from here and there, so this can only
12
+ ;;; reasonably inherit the GNU licence (GPL)
13
+ ;;;
14
+ ;;; Setup:
15
+ ;;; In your .emacs add the following 2 lines to get a Say menu:
16
+ ;;;
17
+ ;;; (autoload 'say-minor-mode "festival" "Menu for using Festival." t)
18
+ ;;; (say-minor-mode t)
19
+ ;;; (setq auto-mode-alist
20
+ ;;; (append '(("\\.festivalrc$" . scheme-mode)) auto-mode-alist))
21
+ ;;;
22
+ ;;; The following gives you pretty colors in emacs-19 if you are into
23
+ ;;; such things
24
+ ;;; ;;; Some colors for scheme mode
25
+ ;;; (hilit-set-mode-patterns
26
+ ;;; '(scheme-mode)
27
+ ;;; '(
28
+ ;;; (";.*" nil comment)
29
+ ;;; (hilit-string-find ?\\ string)
30
+ ;;; ("^\\s *(def\\s +" "\\()\\|nil\\)" defun)
31
+ ;;; ("^\\s *(defvar\\s +\\S +" nil decl)
32
+ ;;; ("^\\s *(set\\s +\\S +" nil decl)
33
+ ;;; ("^\\s *(defconst\\s +\\S +" nil define)
34
+ ;;; ("^\\s *(\\(provide\\|require\\).*$" nil include)
35
+ ;;; ("(\\(let\\*?\\|cond\\|if\\|or\\|and\\|map\\(car\\|concat\\)\\|prog[n1*]?\\|while\\|lambda\\|function\\|Parameter\\|set\\([qf]\\|car\\|cdr\\)?\\|nconc\\|eval-when-compile\\|condition-case\\|unwind-protect\\|catch\\|throw\\|error\\)[ \t\n]" 1 keyword)))
36
+ ;;;
37
+ ;;;
38
+ ;;;--------------------------------------------------------------------
39
+ ;;; Copyright (C) Alan W Black 1996
40
+ ;;; This code is distributed in the hope that it will be useful,
41
+ ;;; but WITHOUT ANY WARRANTY. No author or distributor accepts
42
+ ;;; responsibility to anyone for the consequences of using this code
43
+ ;;; or for whether it serves any particular purpose or works at all,
44
+ ;;; unless explicitly stated in a written agreement.
45
+ ;;;
46
+ ;;; Everyone is granted permission to copy, modify and redistribute
47
+ ;;; this code, but only under the conditions described in the GNU
48
+ ;;; Emacs General Public License. A copy of this license is
49
+ ;;; distrubuted with GNU Emacs so you can know your rights and
50
+ ;;; responsibilities. It should be in a file named COPYING. Among
51
+ ;;; other things, the copyright notice and this notice must be
52
+ ;;; preserved on all copies.
53
+ ;;;--------------------------------------------------------------------
54
+ ;;;
55
+
56
+ (defvar festival-program-name "festival")
57
+
58
+ (defvar festival-process nil)
59
+
60
+ (defvar festival-tmp-file
61
+ (format "/tmp/festival-emacs-tmp-%s" (user-real-login-name))
62
+ "Filename to save input for Festivial.")
63
+
64
+ (defun festival-fast ()
65
+ (interactive)
66
+ (festival-send-command '(Parameter.set 'Duration.Stretch 0.8)))
67
+ (defun festival-slow ()
68
+ (interactive)
69
+ (festival-send-command '(Parameter.set 'Duration.Stretch 1.2)))
70
+ (defun festival-ndur ()
71
+ (interactive)
72
+ (festival-send-command '(Parameter.set 'Duration.Stretch 1.0)))
73
+ (defun festival-intro ()
74
+ (interactive)
75
+ (festival-send-command '(intro)))
76
+
77
+ (defun festival-gsw ()
78
+ (interactive)
79
+ (festival-send-command '(voice_gsw_diphone)))
80
+ (defun festival-rab ()
81
+ (interactive)
82
+ (festival-send-command '(voice_rab_diphone)))
83
+ (defun festival-ked ()
84
+ (interactive)
85
+ (festival-send-command '(voice_ked_diphone)))
86
+ (defun festival-kal ()
87
+ (interactive)
88
+ (festival-send-command '(voice_kal_diphone)))
89
+ (defun festival-don ()
90
+ (interactive)
91
+ (festival-send-command '(voice_don_diphone)))
92
+ (defun festival-welsh ()
93
+ (interactive)
94
+ (festival-send-command '(voice_welsh_hl)))
95
+ (defun festival-spanish ()
96
+ (interactive)
97
+ (festival-send-command '(voice_spanish_el)))
98
+
99
+ (defun festival-say-string (string)
100
+ "Send string to festival and have it said"
101
+ (interactive "sSay: ")
102
+ (festival-start-process)
103
+ (process-send-string festival-process
104
+ (concat "(SayText " (format "%S" string) ")
105
+ ")))
106
+
107
+ (defun festival-send-command (cmd)
108
+ "Send command to festival"
109
+ (interactive "px")
110
+ (festival-start-process)
111
+ (process-send-string festival-process (format "%S
112
+ " cmd)))
113
+
114
+ (defun festival-process-status ()
115
+ (interactive)
116
+ (if festival-process
117
+ (message (format "Festival process status: %s"
118
+ (process-status festival-process)))
119
+ (message (format "Festival process status: NONE"))))
120
+
121
+ (defun festival-start-process ()
122
+ "Check status of process and start it if necessary"
123
+ (interactive )
124
+ (let ((process-connection-type t))
125
+ (if (and festival-process
126
+ (eq (process-status festival-process) 'run))
127
+ 't
128
+ ;;(festival-kill-festival t)
129
+ (message "Starting new synthesizer process...")
130
+ (sit-for 0)
131
+ (setq festival-process
132
+ (start-process "festival" (get-buffer-create "*festival*")
133
+ festival-program-name)))
134
+ ))
135
+
136
+ (defun festival-kill-process ()
137
+ "Kill festival sub-process"
138
+ (interactive)
139
+ (if festival-process
140
+ (kill-process festival-process))
141
+ (setq festival-process nil)
142
+ (message "Festival process killed"))
143
+
144
+ (defun festival-send-string (string)
145
+ "Send given string to fesitval process."
146
+ (interactive)
147
+ (festival-start-process)
148
+ (process-send-string festival-process string))
149
+
150
+ (defun festival-say-region (reg-start reg-end)
151
+ "Send given region to festival for saying. This saves the region
152
+ as a file in /tmp and then tells festival to say that file. The
153
+ major mode is *not* passed as text mode name to Festival."
154
+ (interactive "r")
155
+ (write-region reg-start reg-end festival-tmp-file)
156
+ (festival-send-command (list 'tts festival-tmp-file nil)))
157
+
158
+ (defun festival-say-buffer ()
159
+ "Send given region to festival for saying. This saves the region
160
+ as a file in /tmp and then tells festival to say that file. The
161
+ major-mode is passed as a text mode to Festival."
162
+ (interactive)
163
+ (write-region (point-min) (point-max) festival-tmp-file)
164
+ ;; Because there may by sgml-like sub-files mentioned
165
+ ;; ensure festival tracks the buffer's default-directory
166
+ (festival-send-command (list 'cd (expand-file-name default-directory)))
167
+ (if (equal "-mode" (substring (format "%S" major-mode) -5 nil))
168
+ (if (equal "sgml" (substring (format "%S" major-mode) 0 -5))
169
+ (festival-send-command
170
+ (list 'tts festival-tmp-file "sable"))
171
+ (festival-send-command
172
+ (list 'tts festival-tmp-file
173
+ (substring (format "%S" major-mode) 0 -5))))
174
+ (festival-send-command (list 'tts festival-tmp-file nil))))
175
+
176
+ ;;
177
+ ;; say-minor-mode provides a menu offering various speech synthesis commands
178
+ ;;
179
+ (defvar say-minor-mode nil)
180
+
181
+ (defun say-minor-mode (arg)
182
+ "Toggle say minor mode.
183
+ With arg, turn say-minor-mode on iff arg is positive."
184
+ (interactive "P")
185
+ (setq say-minor-mode
186
+ (if (if (null arg) (not say-minor-mode)
187
+ (> (prefix-numeric-value arg) 0))
188
+ t))
189
+ (force-mode-line-update))
190
+
191
+ (setq say-params-menu (make-sparse-keymap "Pitch/Duration"))
192
+ (fset 'say-params-menu (symbol-value 'say-params-menu))
193
+ (define-key say-params-menu [say-fast] '("Fast" . festival-fast))
194
+ (define-key say-params-menu [say-slow] '("Slow" . festival-slow))
195
+ (define-key say-params-menu [say-ndur] '("Normal Dur" . festival-ndur))
196
+
197
+ (setq say-lang-menu (make-sparse-keymap "Select language"))
198
+ (fset 'say-lang-menu (symbol-value 'say-lang-menu))
199
+ (define-key say-lang-menu [say-lang-spain1] '("Spanish el" . festival-spanish))
200
+ (define-key say-lang-menu [say-lang-welsh1] '("Welsh hl" . festival-welsh))
201
+ (define-key say-lang-menu [say-lang-eng5] '("English gsw" . festival-gsw))
202
+ (define-key say-lang-menu [say-lang-eng4] '("English don" . festival-don))
203
+ (define-key say-lang-menu [say-lang-eng3] '("English rab" . festival-rab))
204
+ (define-key say-lang-menu [say-lang-eng2] '("English ked" . festival-ked))
205
+ (define-key say-lang-menu [say-lang-eng1] '("English kal" . festival-kal))
206
+ ;(define-key say-params-menu [say-set-dur-stretch]
207
+ ; '("Set Duration Stretch" . festival-set-dur-stretch))
208
+ ;(define-key say-params-menu [say-high] '("High" . festival-high))
209
+ ;(define-key say-params-menu [say-low] '("Low" . festival-low))
210
+ ;(define-key say-params-menu [say-npit] '("Normal Pitch" . festival-npit))
211
+ ;(define-key say-params-menu [say-set-pitch-stretch]
212
+ ; '("Set Pitch Stretch" . festival-set-pitch-stretch))
213
+
214
+ (setq say-minor-mode-map (make-sparse-keymap))
215
+ (setq say-menu (make-sparse-keymap "SAY"))
216
+ (define-key say-minor-mode-map [menu-bar SAY] (cons "Say" say-menu))
217
+ (define-key say-minor-mode-map [menu-bar SAY festival-intro] '("Festival Intro" . festival-intro))
218
+ (define-key say-minor-mode-map [menu-bar SAY festival-process-status] '("Festival status" . festival-process-status))
219
+ (define-key say-minor-mode-map [menu-bar SAY festival-kill-process] '("Kill Festival" . festival-kill-process))
220
+ (define-key say-minor-mode-map [menu-bar SAY festival-start-process] '("(Re)start Festival" . festival-start-process))
221
+ ;;(define-key say-menu [separator-process] '("--"))
222
+ ;;(define-key say-menu [params] '("Pitch/Durations" . say-params-menu))
223
+ (define-key say-menu [separator-buffers] '("--"))
224
+ (define-key say-menu [festival-send-command] '("Festival eval command" . festival-send-command))
225
+ (define-key say-menu [say-lang-menu] '("Select language" . say-lang-menu))
226
+ (define-key say-menu [festival-say-buffer] '("Say buffer" . festival-say-buffer))
227
+ (define-key say-menu [festival-say-region] '("Say region" . festival-say-region))
228
+
229
+
230
+ (setq minor-mode-map-alist
231
+ (cons
232
+ (cons 'say-minor-mode say-minor-mode-map)
233
+ minor-mode-map-alist))
234
+
235
+ (or (assq 'say-minor-mode minor-mode-alist)
236
+ (setq minor-mode-alist
237
+ (cons '(say-minor-mode "") minor-mode-alist)))
238
+
239
+ ;;;
240
+ ;;; A FESTIVAL inferior mode (copied from prolog.el)
241
+ ;;;
242
+ (defvar inferior-festival-mode-map nil)
243
+
244
+ (defun inferior-festival-mode ()
245
+ "Major mode for interacting with an inferior FESTIVAL process.
246
+
247
+ The following commands are available:
248
+ \\{inferior-festival-mode-map}
249
+
250
+ Entry to this mode calls the value of `festival-mode-hook' with no arguments,
251
+ if that value is non-nil. Likewise with the value of `comint-mode-hook'.
252
+ `festival-mode-hook' is called after `comint-mode-hook'.
253
+
254
+ You can send text to the inferior FESTIVAL from other buffers
255
+ using the commands `send-region', `send-string'
256
+
257
+ Return at end of buffer sends line as input.
258
+ Return not at end copies rest of line to end and sends it.
259
+ \\[comint-kill-input] and \\[backward-kill-word] are kill commands, imitating normal Unix input editing.
260
+ \\[comint-interrupt-subjob] interrupts the shell or its current subjob if any.
261
+ \\[comint-stop-subjob] stops. \\[comint-quit-subjob] sends quit signal."
262
+ (interactive)
263
+ (require 'comint)
264
+ (comint-mode)
265
+ (setq major-mode 'inferior-festival-mode
266
+ mode-name "Inferior FESTIVAL"
267
+ comint-prompt-regexp "^festival> ")
268
+ (if inferior-festival-mode-map nil
269
+ (setq inferior-festival-mode-map (copy-keymap comint-mode-map))
270
+ (festival-mode-commands inferior-festival-mode-map))
271
+ (use-local-map inferior-festivalr-mode-map)
272
+ (run-hooks 'festival-mode-hook))
273
+
274
+ ;;;###autoload
275
+ (defun run-festival ()
276
+ "Run an inferior FESTIVAL process, input and output via buffer *festival*."
277
+ (interactive)
278
+ (require 'comint)
279
+ (switch-to-buffer (make-comint "festival" festival-program-name))
280
+ (inferior-festival-mode))
281
+
282
+ (provide 'festival)
CosyVoice-ttsfrd/resource/festival/festival.scm ADDED
@@ -0,0 +1,633 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;; General Festival Scheme specific functions
34
+ ;;; Including definitions of various standard variables.
35
+
36
+ ;; will be set automatically on start-up
37
+ (defvar festival_version "unknown"
38
+ "festival_version
39
+ A string containing the current version number of the system.")
40
+
41
+ ;; will be set automatically on start-up
42
+ (defvar festival_version_number '(x x x)
43
+ "festival_version_number
44
+ A list of major, minor and subminor version numbers of the current
45
+ system. e.g. (1 0 12).")
46
+
47
+ (define (apply_method method utt)
48
+ "(apply_method METHOD UTT)
49
+ Apply the appropriate function to utt defined in parameter."
50
+ (let ((method_val (Parameter.get method)))
51
+ (cond
52
+ ((null method_val)
53
+ nil) ;; should be an error, but I'll let you off at present
54
+ ((and (symbol? method_val) (symbol-bound? method_val))
55
+ (apply (symbol-value method_val) (list utt)))
56
+ ((member (typeof method_val) '(subr closure))
57
+ (apply method_val (list utt)))
58
+ (t ;; again is probably an error
59
+ nil))))
60
+
61
+ (define (require_module l)
62
+ "(require_module l)
63
+ Check that certain compile-time modules are included in this installation.
64
+ l may be a single atom or list of atoms. Each item in l must appear in
65
+ *modules* otherwise an error is throw."
66
+ (if (consp l)
67
+ (mapcar require_module l)
68
+ (if (not (member_string l *modules*))
69
+ (error (format nil "module %s required, but not compiled in this installation\n" l))))
70
+ t)
71
+
72
+ ;;; Feature Function Functions
73
+ (define (utt.features utt relname func_list)
74
+ "(utt.features UTT RELATIONNAME FUNCLIST)
75
+ Get vectors of feature values for each item in RELATIONNAME in UTT.
76
+ [see Features]"
77
+ (mapcar
78
+ (lambda (s)
79
+ (mapcar (lambda (f) (item.feat s f)) func_list))
80
+ (utt.relation.items utt relname)))
81
+
82
+ (define (utt.type utt)
83
+ "(utt.type UTT)
84
+ Returns the type of UTT."
85
+ (intern (utt.feat utt 'type)))
86
+
87
+ (define (utt.save.segs utt filename)
88
+ "(utt.save.segs UTT FILE)
89
+ Save segments of UTT in a FILE in xlabel format."
90
+ (let ((fd (fopen filename "w")))
91
+ (format fd "#\n")
92
+ (mapcar
93
+ (lambda (info)
94
+ (format fd "%2.4f 100 %s\n" (car info) (car (cdr info))))
95
+ (utt.features utt 'Segment '(segment_end name)))
96
+ (fclose fd)
97
+ utt))
98
+
99
+ (define (utt.save.words utt filename)
100
+ "(utt.save.words UTT FILE)
101
+ Save words of UTT in a FILE in xlabel format."
102
+ (let ((fd (fopen filename "w")))
103
+ (format fd "#\n")
104
+ (mapcar
105
+ (lambda (info)
106
+ (format fd "%2.4f 100 %s\n" (car info) (car (cdr info))))
107
+ (utt.features utt 'Word '(word_end name)))
108
+ (fclose fd)
109
+ utt))
110
+
111
+ (define (utt.resynth labfile f0file)
112
+ "(utt.resynth LABFILE F0FILE)
113
+ Resynthesize an utterance from a label file and F0 file (in any format
114
+ supported by the Speech Tool Library). This loads, synthesizes and
115
+ plays the utterance."
116
+ (let (u f0 f0_item)
117
+ (set! u (Utterance SegF0)) ; need some u to start with
118
+ (utt.relation.load u 'Segment labfile)
119
+ (utt.relation.create u 'f0)
120
+ (set! f0 (track.load f0file))
121
+ (set! f0_item (utt.relation.append u 'f0))
122
+ (item.set_feat f0_item "name" "f0")
123
+ (item.set_feat f0_item "f0" f0)
124
+
125
+ ;; emulabel may have flipped pau to H#
126
+ (mapcar
127
+ (lambda (s)
128
+ (cond
129
+ ((string-matches (item.name s) "[hH]#")
130
+ (item.set_feat s "name" "pau"))
131
+ ((string-matches (item.name s) "#.*")
132
+ (item.set_feat s "name" (string-after (item.name s) "#")))))
133
+ (utt.relation.items u 'Segment))
134
+
135
+ (Wave_Synth u)
136
+ (utt.play u)
137
+ u))
138
+
139
+ (define (utt.relation.present utt relation)
140
+ "(utt.relation.present UTT RELATIONNAME)
141
+ Returns t if UTT caontains a relation called RELATIONNAME, nil otherwise."
142
+ (if (member_string relation (utt.relationnames utt))
143
+ t
144
+ nil))
145
+
146
+ (define (utt.relation.leafs utt relation)
147
+ "(utt.relation.leafs UTT RELATIONNAME)
148
+ Returns a list of all the leafs in this relation."
149
+ (let ((leafs nil))
150
+ (mapcar
151
+ (lambda (i)
152
+ (if (not (item.down (item.relation i relation)))
153
+ (set! leafs (cons i leafs))))
154
+ (utt.relation.items utt relation))
155
+ (reverse leafs)))
156
+
157
+ (define (utt.relation.first utt relation)
158
+ "(utt.relation.first UTT RELATIONNAME)
159
+ Returns a the first item in this relation."
160
+ (utt.relation utt relation))
161
+
162
+ (define (utt.relation.last utt relation)
163
+ "(utt.relation.last UTT RELATIONNAME)
164
+ Returns a the last item in this relation."
165
+ (let ((i (utt.relation.first utt relation)))
166
+ (while (item.next i)
167
+ (set! i (item.next i)))
168
+ i))
169
+
170
+ (define (item.feat.present item feat)
171
+ "(item.feat.present item feat)
172
+ nil if feat doesn't existing in this item, non-nil otherwise."
173
+ (and item (assoc_string feat (item.features item))))
174
+
175
+ (define (item.relation.append_daughter parent relname daughter)
176
+ "(item.relation.append_daughter parent relname daughter)
177
+ Make add daughter to parent as a new daughter in relname."
178
+ (item.append_daughter (item.relation parent relname) daughter))
179
+
180
+ (define (item.relation.insert si relname newsi direction)
181
+ "(item.relation.insert si relname newsi direction)
182
+ Insert newsi in relation relname with respect to direction. If
183
+ direction is ommited after is assumed, valid directions are after
184
+ before, above and below. Note you should use
185
+ item.relation.append_daughter for tree adjoining. newsi maybe
186
+ a item itself of a LISP description of one."
187
+ (item.insert
188
+ (item.relation si relname)
189
+ newsi
190
+ direction))
191
+
192
+ (define (item.relation.daughters parent relname)
193
+ "(item.relation.daughters parent relname)
194
+ Return a list of all daughters of parent by relname."
195
+ (let ((d1 (item.daughter1 (item.relation parent relname)))
196
+ (daughters))
197
+ (while d1
198
+ (set! daughters (cons d1 daughters))
199
+ (set! d1 (item.next d1)))
200
+ (reverse daughters)))
201
+
202
+ (define (item.daughters p)
203
+ "(item.daughters parent)
204
+ Return a list of all daughters of parent."
205
+ (item.relation.daughters p (item.relation.name p)))
206
+
207
+ (define (item.relation.parent si relname)
208
+ "(item.relation.parent item relname)
209
+ Return the parent of this item in this relation."
210
+ (item.parent (item.relation si relname)))
211
+
212
+ (define (item.relation.daughter1 si relname)
213
+ "(item.relation.daughter1 item relname)
214
+ Return the first daughter of this item in this relation."
215
+ (item.daughter1 (item.relation si relname)))
216
+
217
+ (define (item.relation.daughter2 si relname)
218
+ "(item.relation.daughter2 item relname)
219
+ Return the second daughter of this item in this relation."
220
+ (item.daughter2 (item.relation si relname)))
221
+
222
+ (define (item.relation.daughtern si relname)
223
+ "(item.relation.daughtern item relname)
224
+ Return the final daughter of this item in this relation."
225
+ (item.daughtern (item.relation si relname)))
226
+
227
+ (define (item.relation.next si relname)
228
+ "(item.relation.next item relname)
229
+ Return the next item in this relation."
230
+ (item.next (item.relation si relname)))
231
+
232
+ (define (item.relation.prev si relname)
233
+ "(item.relation.prev item relname)
234
+ Return the previous item in this relation."
235
+ (item.prev (item.relation si relname)))
236
+
237
+ (define (item.relation.first si relname)
238
+ "(item.relation.first item relname)
239
+ Return the most previous item from this item in this relation."
240
+ (let ((n (item.relation si relname)))
241
+ (while (item.prev n)
242
+ (set! n (item.prev n)))
243
+ n))
244
+
245
+ (define (item.leafs si)
246
+ "(item.relation.leafs item relname)
247
+ Return a list of the leafs of this item in this relation."
248
+ (let ((ls nil)
249
+ (pl (item.first_leaf si))
250
+ (ll (item.next_leaf (item.last_leaf si))))
251
+ (while (and pl (not (equal? pl ll)))
252
+ (set! ls (cons pl ls))
253
+ (set! pl (item.next_leaf pl)))
254
+ (reverse ls)))
255
+
256
+ (define (item.relation.leafs si relname)
257
+ "(item.relation.leafs item relname)
258
+ Return a list of the leafs of this item in this relation."
259
+ (item.leafs (item.relation si relname)))
260
+
261
+ (define (item.root s)
262
+ "(item.root s)
263
+ Follow parent link until s has no parent."
264
+ (cond
265
+ ((item.parent s)
266
+ (item.root (item.parent s)))
267
+ (t s)))
268
+
269
+ (define (item.parent_to s relname)
270
+ "(item.parent_to s relname)
271
+ Find the first ancestor of s in its current relation that is also in
272
+ relname. s is treated as an ancestor of itself so if s is in relname
273
+ it is returned. The returned value is in will be in relation relname
274
+ or nil if there isn't one."
275
+ (cond
276
+ ((null s) s)
277
+ ((member_string relname (item.relations s))
278
+ (item.relation s relname))
279
+ (t (item.parent_to (item.parent s) relname))))
280
+
281
+ (define (item.daughter1_to s relname)
282
+ "(item.daughter1_to s relname)
283
+ Follow daughter1 links of s in its current relation until an item
284
+ is found that is also in relname, is s is in relname it is returned.
285
+ The return item is returned in relation relname, or nil if there is
286
+ nothing in relname."
287
+ (cond
288
+ ((null s) s)
289
+ ((member_string relname (item.relations s)) (item.relation s relname))
290
+ (t (item.daughter1_to (item.daughter1 s) relname))))
291
+
292
+ (define (item.daughtern_to s relname)
293
+ "(item.daughter1_to s relname)
294
+ Follow daughtern links of s in its current relation until an item
295
+ is found that is also in relname, is s is in relname it is returned.
296
+ The return item is returned in relation relname, or nil if there is
297
+ nothing in relname."
298
+ (cond
299
+ ((null s) s)
300
+ ((member_string relname (item.relations s)) (item.relation s relname))
301
+ (t (item.daughtern_to (item.daughtern s) relname))))
302
+
303
+ (define (item.name s)
304
+ "(item.name ITEM)
305
+ Returns the name of ITEM. [see Accessing an utterance]"
306
+ (item.feat s "name"))
307
+
308
+ (define (utt.wave utt)
309
+ "(utt.wave UTT)
310
+ Get waveform from wave (R:Wave.first.wave)."
311
+ (item.feat (utt.relation.first utt "Wave") "wave"))
312
+
313
+ (define (utt.wave.rescale . args)
314
+ "(utt.wave.rescale UTT FACTOR NORMALIZE)
315
+ Modify the gain of the waveform in UTT by GAIN. If NORMALIZE is
316
+ specified and non-nil the waveform is maximized first."
317
+ (wave.rescale (utt.wave (nth 0 args)) (nth 1 args) (nth 2 args))
318
+ (nth 0 args))
319
+
320
+ (define (utt.wave.resample utt rate)
321
+ "(utt.wave.resample UTT RATE)\
322
+ Resample waveform in UTT to RATE (if it is already at that rate it remains
323
+ unchanged)."
324
+ (wave.resample (utt.wave utt) rate)
325
+ utt)
326
+
327
+ (define (utt.import.wave . args)
328
+ "(utt.import.wave UTT FILENAME APPEND)
329
+ Load waveform in FILENAME into UTT in R:Wave.first.wave. If APPEND
330
+ is specified and non-nil append this to the current waveform."
331
+ (let ((utt (nth 0 args))
332
+ (filename (nth 1 args))
333
+ (append (nth 2 args)))
334
+ (if (and append (member 'Wave (utt.relationnames utt)))
335
+ (wave.append (utt.wave utt) (wave.load filename))
336
+ (begin
337
+ (utt.relation.create utt 'Wave)
338
+ (item.set_feat
339
+ (utt.relation.append utt 'Wave)
340
+ "wave"
341
+ (wave.load filename))))
342
+ utt))
343
+
344
+ (define (utt.save.wave . args)
345
+ "(utt.save.wave UTT FILENAME FILETYPE)
346
+ Save waveform in UTT in FILENAME with FILETYPE (if specified) or
347
+ using global parameter Wavefiletype."
348
+ (wave.save
349
+ (utt.wave (nth 0 args))
350
+ (nth 1 args)
351
+ (nth 2 args))
352
+ (nth 0 args))
353
+
354
+ (define (utt.play utt)
355
+ "(utt.play UTT)
356
+ Play waveform in utt by current audio method."
357
+ (wave.play (utt.wave utt))
358
+ utt)
359
+
360
+ (define (utt.save.track utt filename relation feature)
361
+ "(utt.save.track utt filename relation feature)
362
+ DEPRICATED use trace.save instead."
363
+ (format stderr "utt.save.track: DEPRICATED use track.save instead\n")
364
+ (track.save
365
+ (item.feat
366
+ (utt.relation.first utt relation)
367
+ feature)
368
+ filename)
369
+ utt)
370
+
371
+ (define (utt.import.track utt filename relation fname)
372
+ "(utt.import.track UTT FILENAME RELATION FEATURE_NAME)
373
+ Load track in FILENAME into UTT in R:RELATION.first.FEATURE_NAME.
374
+ Deletes RELATION if it already exists. (you maybe want to use track.load
375
+ directly rather than this legacy function."
376
+ (utt.relation.create utt relation)
377
+ (item.set_feat
378
+ (utt.relation.append utt relation)
379
+ fname
380
+ (track.load filename))
381
+ utt)
382
+
383
+ (define (wagon_predict item tree)
384
+ "(wagon_predict ITEM TREE)
385
+ Predict with given ITEM and CART tree and return the prediction
386
+ (the last item) rather than whole probability distribution."
387
+ (car (last (wagon item tree))))
388
+
389
+ (define (phone_is_silence phone)
390
+ (member_string
391
+ phone
392
+ (car (cdr (car (PhoneSet.description '(silences)))))))
393
+
394
+ (define (phone_feature phone feat)
395
+ "(phone_feature phone feat)
396
+ Return the feature for given phone in current phone set, or 0
397
+ if it doesn't exist."
398
+ (let ((ph (intern phone)))
399
+ (let ((fnames (cadr (assoc 'features (PhoneSet.description))))
400
+ (fvals (cdr (assoc ph (cadr (assoc 'phones (PhoneSet.description)))))))
401
+ (while (and fnames (not (string-equal feat (car (car fnames)))))
402
+ (set! fvals (cdr fvals))
403
+ (set! fnames (cdr fnames)))
404
+ (if fnames
405
+ (car fvals)
406
+ 0))))
407
+
408
+ (defvar server_max_clients 10
409
+ "server_max_clients
410
+ In server mode, the maximum number of clients supported at any one
411
+ time. When more that this number of clients attach simulaneous
412
+ the last ones are denied access. Default value is 10.
413
+ [see Server/client API]")
414
+
415
+ (defvar server_port 1314
416
+ "server_port
417
+ In server mode the inet port number the server will wait for connects
418
+ on. The default value is 1314. [see Server/client API]")
419
+
420
+ (defvar server_log_file t
421
+ "server_log_file
422
+ If set to t server log information is printed to standard output
423
+ of the server process. If set to nil no output is given. If set
424
+ to anything else the value is used as the name of file to which
425
+ server log information is appended. Note this value is checked at
426
+ server start time, there is no way a client may change this.
427
+ [see Server/client API]")
428
+
429
+ (defvar server_passwd nil
430
+ "server_passwd
431
+ If non-nil clients must send this passwd to the server followed by
432
+ a newline before they can get a connection. It would be normal
433
+ to set this for the particular server task.
434
+ [see Server/client API]")
435
+
436
+ (defvar server_access_list '(localhost)
437
+ "server_access_list
438
+ If non-nil this is the exhaustive list of machines and domains
439
+ from which clients may access the server. This is a list of REGEXs
440
+ that client host must match. Remember to add the backslashes before
441
+ the dots. [see Server/client API]")
442
+
443
+ (defvar server_deny_list nil
444
+ "server_deny_list
445
+ If non-nil this is a list of machines which are to be denied access
446
+ to the server absolutely, irrespective of any other control features.
447
+ The list is a list of REGEXs that are used to matched the client hostname.
448
+ This list is checked first, then server_access_list, then passwd.
449
+ [see Server/client API]")
450
+
451
+ (define (def_feature_docstring fname fdoc)
452
+ "(def_feature_docstring FEATURENAME FEATUREDOC)
453
+ As some feature are used directly of stream items with no
454
+ accompanying feature function, the features are just values on the feature
455
+ list. This function also those features to have an accompanying
456
+ documentation string."
457
+ (let ((fff (assoc fname ff_docstrings)))
458
+ (cond
459
+ (fff ;; replace what's already there
460
+ (set-cdr! fff fdoc))
461
+ (t
462
+ (set! ff_docstrings (cons (cons fname fdoc) ff_docstrings))))
463
+ t))
464
+
465
+ (define (linear_regression item model)
466
+ "(linear_regression ITEM MODEL)
467
+ Use linear regression MODEL on ITEM. MODEL consists of a list
468
+ of features, weights and optional map list. E.g. ((Intercept 100)
469
+ (tobi_accent 10 (H* !H*)))."
470
+ (let ((intercept (if (equal? 'Intercept (car (car model)))
471
+ (car (cdr (car model))) 0))
472
+ (mm (if (equal? 'Intercept (car (car model)))
473
+ (cdr model) model)))
474
+ (apply +
475
+ (cons intercept
476
+ (mapcar
477
+ (lambda (f)
478
+ (let ((ff (item.feat item (car f))))
479
+ (if (car (cdr (cdr f)))
480
+ (if (member_string ff (car (cdr (cdr f))))
481
+ (car (cdr f))
482
+ 0)
483
+ (* (parse-number ff) (car (cdr f))))))
484
+ mm)))))
485
+
486
+ (defvar help
487
+ "The Festival Speech Synthesizer System: Help
488
+
489
+ Getting Help
490
+ (doc '<SYMBOL>) displays help on <SYMBOL>
491
+ (manual nil) displays manual in local netscape
492
+ C-c return to top level
493
+ C-d or (quit) Exit Festival
494
+ (If compiled with editline)
495
+ M-h displays help on current symbol
496
+ M-s speaks help on current symbol
497
+ M-m displays relevant manula page in local netscape
498
+ TAB Command, symbol and filename completion
499
+ C-p or up-arrow Previous command
500
+ C-b or left-arrow Move back one character
501
+ C-f or right-arrow
502
+ Move forward one character
503
+ Normal Emacs commands work for editing command line
504
+
505
+ Doing stuff
506
+ (SayText TEXT) Synthesize text, text should be surrounded by
507
+ double quotes
508
+ (tts FILENAME nil) Say contexts of file, FILENAME should be
509
+ surrounded by double quotes
510
+ (voice_rab_diphone) Select voice (Britsh Male)
511
+ (voice_kal_diphone) Select voice (American Male)
512
+ ")
513
+
514
+ (define (festival_warranty)
515
+ "(festival_warranty)
516
+ Display Festival's copyright and warranty. [see Copying]"
517
+ (format t
518
+ (string-append
519
+ " The Festival Speech Synthesis System: "
520
+ festival_version
521
+ "
522
+ Centre for Speech Technology Research
523
+ University of Edinburgh, UK
524
+ Copyright (c) 1996-2014
525
+ All Rights Reserved.
526
+
527
+ Permission is hereby granted, free of charge, to use and distribute
528
+ this software and its documentation without restriction, including
529
+ without limitation the rights to use, copy, modify, merge, publish,
530
+ distribute, sublicense, and/or sell copies of this work, and to
531
+ permit persons to whom this work is furnished to do so, subject to
532
+ the following conditions:
533
+ 1. The code must retain the above copyright notice, this list of
534
+ conditions and the following disclaimer.
535
+ 2. Any modifications must be clearly marked as such.
536
+ 3. Original authors' names are not deleted.
537
+ 4. The authors' names are not used to endorse or promote products
538
+ derived from this software without specific prior written
539
+ permission.
540
+
541
+ THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK
542
+ DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
543
+ ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT
544
+ SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE
545
+ FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
546
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
547
+ AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
548
+ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
549
+ THIS SOFTWARE.
550
+ ")))
551
+
552
+ (define (intro)
553
+ "(intro)
554
+ Synthesize an introduction to the Festival Speech Synthesis System."
555
+ (tts (path-append libdir "../examples/intro.text") nil))
556
+
557
+ (define (intro-spanish)
558
+ "(intro-spanish)
559
+ Synthesize an introduction to the Festival Speech Synthesis System
560
+ in spanish. Spanish voice must already be selected for this."
561
+ (tts (path-append libdir "../examples/spintro.text") nil))
562
+
563
+ (define (na_play FILENAME)
564
+ "(play_wave FILENAME)
565
+ Play given wavefile"
566
+ (utt.play (utt.synth (eval (list 'Utterance 'Wave FILENAME)))))
567
+
568
+ ;;; Some autoload commands
569
+ (autoload manual-sym "festdoc" "Show appropriate manual section for symbol.")
570
+ (autoload manual "festdoc" "Show manual section.")
571
+
572
+ (autoload display "display" "Graphically display utterance.")
573
+
574
+ (autoload festtest "festtest" "Run tests of Festival.")
575
+
576
+ (defvar diphone_module_hooks nil
577
+ "diphone_module_hooks
578
+ A function or list of functions that will be applied to the utterance
579
+ at the start of the diphone module. It can be used to map segment
580
+ names to those that will be used by the diphone database itself.
581
+ Typical use specifies _ and $ for consonant clusters and syllable
582
+ boundaries, mapping to dark ll's etc. Reduction and tap type
583
+ phenomena should probabaly be done by post lexical rules though the
584
+ distinction is not a clear one.")
585
+
586
+ (def_feature_docstring
587
+ 'Segment.diphone_phone_name
588
+ "Segment.diphone_phone_name
589
+ This is produced by the diphone module to contain the desired phone
590
+ name for the desired diphone. This adds things like _ if part of
591
+ a consonant or $ to denote syllable boundaries. These are generated
592
+ on a per voice basis by function(s) specified by diphone_module_hooks.
593
+ Identification of dark ll's etc. may also be included. Note this is not
594
+ necessarily the name of the diphone selected as if it is not found
595
+ some of these characters will be removed and fall back values will be
596
+ used.")
597
+
598
+ (def_feature_docstring
599
+ 'Syllable.stress
600
+ "Syllable.stress
601
+ The lexical stress of the syllable as specified from the lexicon entry
602
+ corresponding to the word related to this syllable.")
603
+
604
+ ;;;
605
+ ;;; I tried some tests on the resulting speed both runtime and loadtime
606
+ ;;; but compiled files don't seem to make any significant difference
607
+ ;;;
608
+ (define (compile_library)
609
+ "(compile_library)
610
+ Compile all the scheme files in the library directory."
611
+ (mapcar
612
+ (lambda (file)
613
+ (format t "compile ... %s\n" file)
614
+ (compile-file (string-before file ".scm")))
615
+ (list
616
+ "synthesis.scm" "siod.scm" "init.scm" "lexicons.scm"
617
+ "festival.scm" "gsw_diphone.scm" "intonation.scm" "duration.scm"
618
+ "pos.scm" "phrase.scm" "don_diphone.scm" "rab_diphone.scm"
619
+ "voices.scm" "tts.scm" "festdoc.scm" "languages.scm" "token.scm"
620
+ "mbrola.scm" "display.scm" "postlex.scm" "tokenpos.scm"
621
+ "festtest.scm" "cslush.scm" "ducs_cluster.scm" "sucs.scm"
622
+ "web.scm" "cart_aux.scm"
623
+ "lts_nrl.scm" "lts_nrl_us.scm" "email-mode.scm"
624
+ "mrpa_phones.scm" "radio_phones.scm" "holmes_phones.scm"
625
+ "mrpa_durs.scm" "klatt_durs.scm" "gswdurtreeZ.scm"
626
+ "tobi.scm" "f2bf0lr.scm"))
627
+ t)
628
+
629
+ ;;; For mlsa resynthesizer
630
+ (defvar mlsa_alpha_param 0.42)
631
+ (defvar mlsa_beta_param 0.0)
632
+
633
+ (provide 'festival)
CosyVoice-ttsfrd/resource/festival/festtest.scm ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; Some basic functions used in tests for Festival
35
+ ;;;
36
+
37
+ (define (test_words text)
38
+ "(test_words TEXT)
39
+ prints TEXT, Synthesizes TEXT and outputs the words in it."
40
+ (format t "Word test: %s\n " text)
41
+ (set! utt1 (utt.synth (eval (list 'Utterance 'Text text))))
42
+ (mapcar
43
+ (lambda (word) (format t "%s " (car word)))
44
+ (utt.features utt1 'Word '(name)))
45
+ (format t "\n")
46
+ t)
47
+
48
+ (define (test_segments text)
49
+ "(test_segments TEXT)
50
+ prints TEXT, Synthesizes TEXT and outputs the segments in it."
51
+ (format t "Segment test: %s\n " text)
52
+ (set! utt1 (utt.synth (eval (list 'Utterance 'Text text))))
53
+ (mapcar
54
+ (lambda (word) (format t "%s " (car word)))
55
+ (utt.features utt1 'Segment '(name)))
56
+ (format t "\n")
57
+ )
58
+
59
+ (define (test_phrases text)
60
+ "(test_phrases TEXT)
61
+ prints TEXT, Synthesizes TEXT and outputs the words and phrase breaks."
62
+ (format t "Phrase test: %s \n " text)
63
+ (set! utt1 (utt.synth (eval (list 'Utterance 'Text text))))
64
+ (mapcar
65
+ (lambda (phrase)
66
+ (mapcar (lambda (w) (format t "%s " (car (car w)))) (cdr phrase))
67
+ (format t "%s\n " (car (car phrase))))
68
+ (utt.relation_tree utt1 'Phrase))
69
+ (format t "\n")
70
+ t)
71
+
72
+ (provide 'festtest)
CosyVoice-ttsfrd/resource/festival/fringe.scm ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+
4
+
5
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6
+ ;;; DO NOT EDIT THIS FILE ON PAIN OF MORE PAIN.
7
+ ;;;
8
+ ;;; The master copy of this file is in ../../speech_tools/lib/siod/fringe.scm
9
+ ;;; and is copied here at build time.
10
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11
+
12
+
13
+
14
+
15
+
16
+
17
+
18
+
19
+
20
+
21
+
22
+
23
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24
+ ;;; ;;
25
+ ;;; Centre for Speech Technology Research ;;
26
+ ;;; University of Edinburgh, UK ;;
27
+ ;;; Copyright (c) 1996,1997 ;;
28
+ ;;; All Rights Reserved. ;;
29
+ ;;; ;;
30
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
31
+ ;;; this software and its documentation without restriction, including ;;
32
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
33
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
34
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
35
+ ;;; the following conditions: ;;
36
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
37
+ ;;; conditions and the following disclaimer. ;;
38
+ ;;; 2. Any modifications must be clearly marked as such. ;;
39
+ ;;; 3. Original authors' names are not deleted. ;;
40
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
41
+ ;;; derived from this software without specific prior written ;;
42
+ ;;; permission. ;;
43
+ ;;; ;;
44
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
45
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
46
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
47
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
48
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
49
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
50
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
51
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
52
+ ;;; THIS SOFTWARE. ;;
53
+ ;;; ;;
54
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
55
+ ;;;
56
+ ;;; Talking to fringe.
57
+
58
+ (defvar fringe_verbose nil
59
+ "fringe_verbose
60
+ If this is set true, all subsequent fringe connections will
61
+ print a trace of what they are doing.")
62
+
63
+ ;;; Aliases which are better suited to command line use.
64
+
65
+ (defvar fringe_name "fringe"
66
+ "fringe_name
67
+ The name of the last name passed to \[fringe_setup\].")
68
+
69
+ (defvar fringe_connection nil
70
+ "fringe_connection
71
+ A connection to fringe, used by the command line fringe functions.")
72
+
73
+ (define (fringe_setup &opt name)
74
+ "(fringe_setup &opt name)
75
+ Connect to fringe."
76
+
77
+ (fringe_read_server_table)
78
+ (if (not name) (set! name fringe_name))
79
+ (set! fringe_connection (fringe_server "fringe"))
80
+ (set! fringe_name name)
81
+ )
82
+
83
+ (define (fringe command)
84
+ "(fringe COMMAND)
85
+ Send COMMAND to the fringe server \[fringe_connection\]
86
+ For command line use, use (fringe_comand_string...) in scripts. "
87
+ (if (not fringe_connection) (fringe_setup))
88
+ (let ((val (fringe_command_string fringe_connection command)))
89
+ (if (or (null val) (consp val))
90
+ nil
91
+ val)
92
+ )
93
+ )
94
+
95
+ (define (fringel package operation args)
96
+ "(fringel PACKAGE OPERATION ARGS)
97
+ Send a command to the fringe server \[fringe_connection\].
98
+ For command line use, use (fringe_comand...) in scripts. "
99
+
100
+ (if (not fringe_connection) (fringe_setup))
101
+ (let ((val (fringe_command fringe_connection package operation args)))
102
+ (if (or (null val) (consp val))
103
+ nil
104
+ val)
105
+ )
106
+ )
107
+
108
+ (provide 'fringe)
CosyVoice-ttsfrd/resource/festival/gswdurtreeZ.scm ADDED
@@ -0,0 +1,947 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; A tree to predict zcore durations build from gsw 450 (timit)
35
+ ;;; doesn't use actual phonemes so it can have better generalizations
36
+ ;;;
37
+
38
+ ;; pre Sue's changes to mrpa_phones (on traing data)
39
+ ;; RMSE 0.79102 Correlation is 0.610184 Mean (abs) Error 0.605081 (0.509517)
40
+ ;; Post with balance
41
+ ;; train test spit --stop 19 --balance 16
42
+ ;; RMSE 0.841861 Correlation is 0.526064 Mean (abs) Error 0.646614 (0.539288)
43
+ ;; on training data
44
+ ;; RMSE 0.784032 Correlation is 0.619165 Mean (abs) Error 0.602819 (0.501332)
45
+ ;;
46
+ ;; Oct 29th 1997
47
+ ;; stepwise (but its over trained)
48
+ ;; RMSE 0.8322 Correlation is 0.5286 Mean (abs) Error 0.6375 (0.5350)
49
+ ;;
50
+ ;; May 11th 1998
51
+ ;; new architecture, full new train on f2b on test data
52
+ ;; in zscore domain
53
+ ;; RMSE 0.8076 Correlation is 0.5307 Mean (abs) Error 0.6113 (0.5278)
54
+ ;; in absolute domain
55
+ ;; RMSE 0.0276 Correlation 0.7468 Mean (abs) error 0.0203 (0.0187)
56
+ ;;
57
+ ;; May 18th 1998
58
+ ;; various corrections f2bdur.bbz.H0.S50.tree no names zscore
59
+ ;; in zscore domain
60
+ ;; RMSE 0.8049 Correlation is 0.6003 Mean (abs) Error 0.6008 (0.5357)
61
+ ;; in absolute domain
62
+ ;; RMSE 0.0268 Correlation 0.7766 Mean (abs) error 0.0196 (0.0183)
63
+
64
+ (set! gsw_duration_cart_tree
65
+ '
66
+ ((name is #)
67
+ ((emph_sil is +)
68
+ ((0.0 -0.5))
69
+ ((p.R:SylStructure.parent.parent.pbreak is BB)
70
+ ((0.0 2.0))
71
+ ((0.0 0.0))))
72
+
73
+ ((R:SylStructure.parent.accented is 0)
74
+ ((n.ph_ctype is 0)
75
+ ((p.ph_vlng is 0)
76
+ ((R:SylStructure.parent.syl_codasize < 1.5)
77
+ ((p.ph_ctype is n)
78
+ ((ph_ctype is f)
79
+ ((0.559208 -0.783163))
80
+ ((1.05215 -0.222704)))
81
+ ((ph_ctype is s)
82
+ ((R:SylStructure.parent.syl_break is 2)
83
+ ((0.589948 0.764459))
84
+ ((R:SylStructure.parent.asyl_in < 0.7)
85
+ ((1.06385 0.567944))
86
+ ((0.691943 0.0530272))))
87
+ ((ph_vlng is l)
88
+ ((pp.ph_vfront is 1)
89
+ ((1.06991 0.766486))
90
+ ((R:SylStructure.parent.syl_break is 1)
91
+ ((0.69665 0.279248))
92
+ ((0.670353 0.0567774))))
93
+ ((p.ph_ctype is s)
94
+ ((seg_onsetcoda is coda)
95
+ ((0.828638 -0.038356))
96
+ ((ph_ctype is f)
97
+ ((0.7631 -0.545853))
98
+ ((0.49329 -0.765994))))
99
+ ((R:SylStructure.parent.parent.gpos is det)
100
+ ((R:SylStructure.parent.last_accent < 0.3)
101
+ ((R:SylStructure.parent.sub_phrases < 1)
102
+ ((0.811686 0.160195))
103
+ ((0.799015 0.713958)))
104
+ ((0.731599 -0.215472)))
105
+ ((ph_ctype is r)
106
+ ((0.673487 0.092772))
107
+ ((R:SylStructure.parent.asyl_in < 1)
108
+ ((0.745273 0.00132813))
109
+ ((0.75457 -0.334898)))))))))
110
+ ((pos_in_syl < 0.5)
111
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
112
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
113
+ ((0.902446 -0.041618))
114
+ ((R:SylStructure.parent.sub_phrases < 2.3)
115
+ ((0.900629 0.262952))
116
+ ((1.18474 0.594794))))
117
+ ((seg_onset_stop is 0)
118
+ ((R:SylStructure.parent.position_type is mid)
119
+ ((0.512323 -0.760444))
120
+ ((R:SylStructure.parent.syl_out < 6.8)
121
+ ((pp.ph_vlng is a)
122
+ ((0.640575 -0.450449))
123
+ ((ph_ctype is f)
124
+ ((R:SylStructure.parent.sub_phrases < 1.3)
125
+ ((0.862876 -0.296956))
126
+ ((R:SylStructure.parent.syl_out < 2.4)
127
+ ((0.803215 0.0422868))
128
+ ((0.877856 -0.154465))))
129
+ ((R:SylStructure.parent.syl_out < 3.6)
130
+ ((R:SylStructure.parent.syl_out < 1.2)
131
+ ((0.567081 -0.264199))
132
+ ((0.598043 -0.541738)))
133
+ ((0.676843 -0.166623)))))
134
+ ((0.691678 -0.57173))))
135
+ ((R:SylStructure.parent.parent.gpos is cc)
136
+ ((1.15995 0.313289))
137
+ ((pp.ph_vfront is 1)
138
+ ((0.555993 0.0695819))
139
+ ((R:SylStructure.parent.asyl_in < 1.2)
140
+ ((R:SylStructure.parent.sub_phrases < 2.7)
141
+ ((0.721635 -0.367088))
142
+ ((0.71919 -0.194887)))
143
+ ((0.547052 -0.0637491)))))))
144
+ ((ph_ctype is s)
145
+ ((R:SylStructure.parent.syl_break is 0)
146
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 1)
147
+ ((0.650007 -0.333421))
148
+ ((0.846301 -0.165383)))
149
+ ((0.527756 -0.516332)))
150
+ ((R:SylStructure.parent.syl_break is 0)
151
+ ((p.ph_ctype is s)
152
+ ((0.504414 -0.779112))
153
+ ((0.812498 -0.337611)))
154
+ ((pos_in_syl < 1.4)
155
+ ((0.513041 -0.745807))
156
+ ((p.ph_ctype is s)
157
+ ((0.350582 -1.04907))
158
+ ((0.362 -0.914974))))))))
159
+ ((R:SylStructure.parent.syl_break is 0)
160
+ ((ph_ctype is n)
161
+ ((R:SylStructure.parent.position_type is initial)
162
+ ((pos_in_syl < 1.2)
163
+ ((0.580485 0.172658))
164
+ ((0.630973 -0.101423)))
165
+ ((0.577937 -0.360092)))
166
+ ((R:SylStructure.parent.syl_out < 2.9)
167
+ ((R:SylStructure.parent.syl_out < 1.1)
168
+ ((R:SylStructure.parent.position_type is initial)
169
+ ((0.896092 0.764189))
170
+ ((R:SylStructure.parent.sub_phrases < 3.6)
171
+ ((ph_ctype is s)
172
+ ((0.877362 0.555132))
173
+ ((0.604511 0.369882)))
174
+ ((0.799982 0.666966))))
175
+ ((seg_onsetcoda is coda)
176
+ ((p.ph_vlng is a)
177
+ ((R:SylStructure.parent.last_accent < 0.4)
178
+ ((0.800736 0.240634))
179
+ ((0.720606 0.486176)))
180
+ ((1.18173 0.573811)))
181
+ ((0.607147 0.194468))))
182
+ ((ph_ctype is r)
183
+ ((0.88377 0.499383))
184
+ ((R:SylStructure.parent.last_accent < 0.5)
185
+ ((R:SylStructure.parent.position_type is initial)
186
+ ((R:SylStructure.parent.parent.word_numsyls < 2.4)
187
+ ((0.62798 0.0737318))
188
+ ((0.787334 0.331014)))
189
+ ((ph_ctype is s)
190
+ ((0.808368 0.0929299))
191
+ ((0.527948 -0.0443271))))
192
+ ((seg_coda_fric is 0)
193
+ ((p.ph_vlng is a)
194
+ ((0.679745 0.517681))
195
+ ((R:SylStructure.parent.sub_phrases < 1.1)
196
+ ((0.759979 0.128316))
197
+ ((0.775233 0.361383))))
198
+ ((R:SylStructure.parent.last_accent < 1.3)
199
+ ((0.696255 0.054136))
200
+ ((0.632425 0.246742))))))))
201
+ ((pos_in_syl < 0.3)
202
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
203
+ ((0.847602 0.621547))
204
+ ((ph_ctype is s)
205
+ ((0.880645 0.501679))
206
+ ((R:SylStructure.parent.sub_phrases < 3.3)
207
+ ((R:SylStructure.parent.sub_phrases < 0.3)
208
+ ((0.901014 -0.042049))
209
+ ((0.657493 0.183226)))
210
+ ((0.680126 0.284799)))))
211
+ ((ph_ctype is s)
212
+ ((p.ph_vlng is s)
213
+ ((0.670033 -0.820934))
214
+ ((0.863306 -0.348735)))
215
+ ((ph_ctype is n)
216
+ ((R:SylStructure.parent.asyl_in < 1.2)
217
+ ((0.656966 -0.40092))
218
+ ((0.530966 -0.639366)))
219
+ ((seg_coda_fric is 0)
220
+ ((1.04153 0.364857))
221
+ ((pos_in_syl < 1.2)
222
+ ((R:SylStructure.parent.syl_out < 3.4)
223
+ ((0.81503 -0.00768613))
224
+ ((0.602665 -0.197753)))
225
+ ((0.601844 -0.394632)))))))))
226
+ ((n.ph_ctype is f)
227
+ ((pos_in_syl < 1.5)
228
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
229
+ ((pos_in_syl < 0.1)
230
+ ((1.63863 0.938841))
231
+ ((R:SylStructure.parent.position_type is initial)
232
+ ((0.897722 -0.0796637))
233
+ ((nn.ph_vheight is 0)
234
+ ((0.781081 0.480026))
235
+ ((0.779711 0.127175)))))
236
+ ((ph_ctype is r)
237
+ ((p.ph_ctype is s)
238
+ ((0.581329 -0.708767))
239
+ ((0.564366 -0.236212)))
240
+ ((ph_vlng is a)
241
+ ((p.ph_ctype is r)
242
+ ((0.70992 -0.273389))
243
+ ((R:SylStructure.parent.parent.gpos is in)
244
+ ((0.764696 0.0581338))
245
+ ((nn.ph_vheight is 0)
246
+ ((0.977737 0.721904))
247
+ ((R:SylStructure.parent.sub_phrases < 2.2)
248
+ ((pp.ph_vfront is 0)
249
+ ((0.586708 0.0161206))
250
+ ((0.619949 0.227372)))
251
+ ((0.707285 0.445569))))))
252
+ ((ph_ctype is n)
253
+ ((R:SylStructure.parent.syl_break is 1)
254
+ ((nn.ph_vfront is 2)
255
+ ((0.430295 -0.120097))
256
+ ((0.741371 0.219042)))
257
+ ((0.587492 0.321245)))
258
+ ((p.ph_ctype is n)
259
+ ((0.871586 0.134075))
260
+ ((p.ph_ctype is r)
261
+ ((0.490751 -0.466418))
262
+ ((R:SylStructure.parent.syl_codasize < 1.3)
263
+ ((R:SylStructure.parent.sub_phrases < 2.2)
264
+ ((p.ph_ctype is s)
265
+ ((0.407452 -0.425925))
266
+ ((0.644771 -0.542809)))
267
+ ((0.688772 -0.201899)))
268
+ ((ph_vheight is 1)
269
+ ((nn.ph_vheight is 0)
270
+ ((0.692018 0.209018))
271
+ ((0.751345 -0.178136)))
272
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.3)
273
+ ((R:SylStructure.parent.asyl_in < 1.5)
274
+ ((0.599633 -0.235593))
275
+ ((0.60042 0.126118)))
276
+ ((p.ph_vlng is a)
277
+ ((0.7148 -0.174812))
278
+ ((R:SylStructure.parent.parent.gpos is content)
279
+ ((0.761296 -0.231509))
280
+ ((0.813081 -0.536405)))))))))))))
281
+ ((ph_ctype is n)
282
+ ((0.898844 0.163343))
283
+ ((p.ph_vlng is s)
284
+ ((seg_coda_fric is 0)
285
+ ((0.752921 -0.45528))
286
+ ((0.890079 -0.0998025)))
287
+ ((ph_ctype is f)
288
+ ((0.729376 -0.930547))
289
+ ((ph_ctype is s)
290
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 0)
291
+ ((0.745052 -0.634119))
292
+ ((0.521502 -0.760176)))
293
+ ((R:SylStructure.parent.syl_break is 1)
294
+ ((0.766575 -0.121355))
295
+ ((0.795616 -0.557509))))))))
296
+ ((p.ph_vlng is 0)
297
+ ((p.ph_ctype is r)
298
+ ((ph_vlng is 0)
299
+ ((0.733659 -0.402734))
300
+ ((R:SylStructure.parent.sub_phrases < 1.5)
301
+ ((ph_vlng is s)
302
+ ((0.326176 -0.988478))
303
+ ((n.ph_ctype is s)
304
+ ((0.276471 -0.802536))
305
+ ((0.438283 -0.900628))))
306
+ ((nn.ph_vheight is 0)
307
+ ((ph_vheight is 2)
308
+ ((0.521 -0.768992))
309
+ ((0.615436 -0.574918)))
310
+ ((ph_vheight is 1)
311
+ ((0.387376 -0.756359))
312
+ ((pos_in_syl < 0.3)
313
+ ((0.417235 -0.808937))
314
+ ((0.384043 -0.93315)))))))
315
+ ((ph_vlng is a)
316
+ ((ph_ctype is 0)
317
+ ((n.ph_ctype is s)
318
+ ((p.ph_ctype is f)
319
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
320
+ ((0.415908 -0.428493))
321
+ ((pos_in_syl < 0.1)
322
+ ((0.790441 0.0211071))
323
+ ((0.452465 -0.254485))))
324
+ ((p.ph_ctype is s)
325
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
326
+ ((0.582447 -0.389966))
327
+ ((0.757648 0.185781)))
328
+ ((R:SylStructure.parent.sub_phrases < 1.4)
329
+ ((0.628965 0.422551))
330
+ ((0.713613 0.145576)))))
331
+ ((seg_onset_stop is 0)
332
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 0)
333
+ ((pp.ph_vfront is 1)
334
+ ((0.412363 -0.62319))
335
+ ((R:SylStructure.parent.syl_out < 3.6)
336
+ ((0.729259 -0.317324))
337
+ ((0.441633 -0.591051))))
338
+ ((R:SylStructure.parent.syl_break is 1)
339
+ ((R:SylStructure.parent.sub_phrases < 2.7)
340
+ ((0.457728 -0.405607))
341
+ ((0.532411 -0.313148)))
342
+ ((R:SylStructure.parent.last_accent < 0.3)
343
+ ((1.14175 0.159416))
344
+ ((0.616396 -0.254651)))))
345
+ ((R:SylStructure.parent.position_type is initial)
346
+ ((0.264181 -0.799896))
347
+ ((0.439801 -0.551309)))))
348
+ ((R:SylStructure.parent.position_type is final)
349
+ ((0.552027 -0.707084))
350
+ ((0.585661 -0.901874))))
351
+ ((ph_ctype is s)
352
+ ((pos_in_syl < 1.2)
353
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
354
+ ((pp.ph_vfront is 1)
355
+ ((0.607449 0.196466))
356
+ ((0.599662 0.00382414)))
357
+ ((0.64109 -0.12859)))
358
+ ((pp.ph_vfront is 1)
359
+ ((0.720484 -0.219339))
360
+ ((0.688707 -0.516734))))
361
+ ((ph_vlng is s)
362
+ ((n.ph_ctype is s)
363
+ ((R:SylStructure.parent.parent.gpos is content)
364
+ ((R:SylStructure.parent.position_type is single)
365
+ ((0.659206 0.159445))
366
+ ((R:SylStructure.parent.parent.word_numsyls < 3.5)
367
+ ((R:SylStructure.parent.sub_phrases < 2)
368
+ ((0.447186 -0.419103))
369
+ ((0.631822 -0.0928561)))
370
+ ((0.451623 -0.576116))))
371
+ ((ph_vheight is 3)
372
+ ((0.578626 -0.64583))
373
+ ((0.56636 -0.4665))))
374
+ ((R:SylStructure.parent.parent.gpos is in)
375
+ ((0.771516 -0.217292))
376
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
377
+ ((0.688571 -0.304382))
378
+ ((R:SylStructure.parent.parent.gpos is content)
379
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 1)
380
+ ((n.ph_ctype is n)
381
+ ((0.556085 -0.572203))
382
+ ((0.820173 -0.240338)))
383
+ ((R:SylStructure.parent.parent.word_numsyls < 2.2)
384
+ ((0.595398 -0.588171))
385
+ ((0.524737 -0.95797))))
386
+ ((R:SylStructure.parent.sub_phrases < 3.9)
387
+ ((0.371492 -0.959427))
388
+ ((0.440479 -0.845747)))))))
389
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 0)
390
+ ((p.ph_ctype is f)
391
+ ((0.524088 -0.482247))
392
+ ((nn.ph_vheight is 1)
393
+ ((0.587666 -0.632362))
394
+ ((ph_vlng is l)
395
+ ((R:SylStructure.parent.position_type is final)
396
+ ((0.513286 -0.713117))
397
+ ((0.604613 -0.924308)))
398
+ ((R:SylStructure.parent.syl_codasize < 2.2)
399
+ ((0.577997 -0.891342))
400
+ ((0.659804 -1.15252))))))
401
+ ((pp.ph_vlng is s)
402
+ ((ph_ctype is f)
403
+ ((0.813383 -0.599624))
404
+ ((0.984027 -0.0771909)))
405
+ ((p.ph_ctype is f)
406
+ ((R:SylStructure.parent.parent.gpos is in)
407
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 1)
408
+ ((0.313572 -1.03242))
409
+ ((0.525854 -0.542799)))
410
+ ((R:SylStructure.parent.syl_out < 2.8)
411
+ ((0.613007 -0.423979))
412
+ ((0.570258 -0.766379))))
413
+ ((R:SylStructure.parent.syl_break is 1)
414
+ ((R:SylStructure.parent.parent.gpos is to)
415
+ ((0.364585 -0.792895))
416
+ ((ph_vlng is l)
417
+ ((0.69143 -0.276816))
418
+ ((0.65673 -0.523721))))
419
+ ((R:SylStructure.parent.syl_out < 3.6)
420
+ ((R:SylStructure.parent.position_type is initial)
421
+ ((0.682096 -0.488102))
422
+ ((0.406364 -0.731758)))
423
+ ((0.584694 -0.822229)))))))))))
424
+ ((n.ph_ctype is r)
425
+ ((R:SylStructure.parent.position_type is initial)
426
+ ((p.ph_vlng is a)
427
+ ((0.797058 1.02334))
428
+ ((ph_ctype is s)
429
+ ((1.0548 0.536277))
430
+ ((0.817253 0.138201))))
431
+ ((R:SylStructure.parent.sub_phrases < 1.1)
432
+ ((R:SylStructure.parent.syl_out < 3.3)
433
+ ((0.884574 -0.23471))
434
+ ((0.772063 -0.525292)))
435
+ ((nn.ph_vfront is 1)
436
+ ((1.25254 0.417485))
437
+ ((0.955557 -0.0781996)))))
438
+ ((pp.ph_vfront is 0)
439
+ ((ph_ctype is f)
440
+ ((n.ph_ctype is s)
441
+ ((R:SylStructure.parent.parent.gpos is content)
442
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 0)
443
+ ((0.583506 -0.56941))
444
+ ((0.525949 -0.289362)))
445
+ ((0.749316 -0.0921038)))
446
+ ((p.ph_vlng is s)
447
+ ((0.734234 0.139463))
448
+ ((0.680119 -0.0708717))))
449
+ ((ph_vlng is s)
450
+ ((ph_vheight is 1)
451
+ ((0.908712 -0.618971))
452
+ ((0.55344 -0.840495)))
453
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 1.2)
454
+ ((pos_in_syl < 1.2)
455
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
456
+ ((0.838715 0.00913392))
457
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 1)
458
+ ((ph_vheight is 2)
459
+ ((0.555513 -0.512523))
460
+ ((R:SylStructure.parent.position_type is initial)
461
+ ((0.758711 0.121704))
462
+ ((0.737555 -0.25637))))
463
+ ((R:SylStructure.parent.syl_out < 3.1)
464
+ ((n.ph_ctype is s)
465
+ ((0.611756 -0.474522))
466
+ ((1.05437 -0.247206)))
467
+ ((R:SylStructure.parent.syl_codasize < 2.2)
468
+ ((R:SylStructure.parent.position_type is final)
469
+ ((0.567761 -0.597866))
470
+ ((0.785599 -0.407765)))
471
+ ((0.575598 -0.741256))))))
472
+ ((ph_ctype is s)
473
+ ((n.ph_ctype is s)
474
+ ((0.661069 -1.08426))
475
+ ((0.783184 -0.39789)))
476
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 1)
477
+ ((R:SylStructure.parent.sub_phrases < 2.6)
478
+ ((0.511323 -0.666011))
479
+ ((0.691878 -0.499492)))
480
+ ((ph_ctype is r)
481
+ ((0.482131 -0.253186))
482
+ ((0.852955 -0.372832))))))
483
+ ((0.854447 -0.0936489)))))
484
+ ((R:SylStructure.parent.position_type is final)
485
+ ((0.685939 -0.249982))
486
+ ((R:SylStructure.parent.syl_out < 3.2)
487
+ ((0.989843 0.18086))
488
+ ((0.686805 -0.0402908)))))))))
489
+ ((R:SylStructure.parent.syl_out < 2.4)
490
+ ((R:SylStructure.parent.syl_out < 0.2)
491
+ ((seg_onsetcoda is coda)
492
+ ((ph_ctype is s)
493
+ ((R:SylStructure.parent.syl_break is 4)
494
+ ((pp.ph_vlng is 0)
495
+ ((0.959737 1.63203))
496
+ ((1.20714 0.994933)))
497
+ ((n.ph_ctype is 0)
498
+ ((R:SylStructure.parent.syl_break is 2)
499
+ ((0.864809 0.214457))
500
+ ((0.874278 0.730381)))
501
+ ((pp.ph_vfront is 0)
502
+ ((seg_coda_fric is 0)
503
+ ((1.20844 -0.336221))
504
+ ((1.01357 0.468302)))
505
+ ((0.658106 -0.799121)))))
506
+ ((n.ph_ctype is f)
507
+ ((ph_ctype is f)
508
+ ((1.26332 0.0300613))
509
+ ((ph_vlng is d)
510
+ ((1.02719 1.1649))
511
+ ((ph_ctype is 0)
512
+ ((R:SylStructure.parent.asyl_in < 1.2)
513
+ ((1.14048 2.2668))
514
+ ((ph_vheight is 1)
515
+ ((1.15528 1.50375))
516
+ ((1.42406 2.07927))))
517
+ ((R:SylStructure.parent.sub_phrases < 1.1)
518
+ ((0.955892 1.10243))
519
+ ((R:SylStructure.parent.syl_break is 2)
520
+ ((1.32682 1.8432))
521
+ ((1.27582 1.59853)))))))
522
+ ((n.ph_ctype is 0)
523
+ ((ph_ctype is n)
524
+ ((R:SylStructure.parent.syl_break is 2)
525
+ ((1.45399 1.12927))
526
+ ((1.05543 0.442376)))
527
+ ((R:SylStructure.parent.syl_break is 4)
528
+ ((R:SylStructure.parent.position_type is final)
529
+ ((ph_ctype is f)
530
+ ((1.46434 1.76508))
531
+ ((0.978055 0.7486)))
532
+ ((1.2395 2.30826)))
533
+ ((ph_ctype is 0)
534
+ ((0.935325 1.69917))
535
+ ((nn.ph_vfront is 1)
536
+ ((1.20456 1.31128))
537
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
538
+ ((nn.ph_vheight is 0)
539
+ ((1.16907 0.212421))
540
+ ((0.952091 0.653094)))
541
+ ((p.ph_ctype is 0)
542
+ ((1.05502 1.25802))
543
+ ((0.818731 0.777568))))))))
544
+ ((ph_ctype is f)
545
+ ((p.ph_ctype is 0)
546
+ ((1.03918 0.163941))
547
+ ((0.737545 -0.167063)))
548
+ ((R:SylStructure.parent.position_type is final)
549
+ ((n.ph_ctype is n)
550
+ ((R:SylStructure.parent.last_accent < 0.5)
551
+ ((R:SylStructure.parent.sub_phrases < 2.8)
552
+ ((0.826207 -0.000859005))
553
+ ((0.871119 0.273433)))
554
+ ((R:SylStructure.parent.parent.word_numsyls < 2.4)
555
+ ((1.17405 1.05694))
556
+ ((0.858394 0.244916))))
557
+ ((R:SylStructure.parent.syl_codasize < 2.2)
558
+ ((p.ph_ctype is 0)
559
+ ((1.14092 1.21187))
560
+ ((R:SylStructure.parent.syl_break is 2)
561
+ ((1.02653 0.59865))
562
+ ((0.94248 1.1634))))
563
+ ((seg_coda_fric is 0)
564
+ ((1.07441 0.292935))
565
+ ((1.15736 0.92574)))))
566
+ ((ph_vlng is s)
567
+ ((R:SylStructure.parent.syl_break is 2)
568
+ ((1.34638 1.23484))
569
+ ((0.951514 2.02008)))
570
+ ((ph_ctype is 0)
571
+ ((p.ph_ctype is r)
572
+ ((0.806106 0.697089))
573
+ ((R:SylStructure.parent.syl_break is 2)
574
+ ((1.10891 0.992197))
575
+ ((1.04657 1.51093))))
576
+ ((1.18165 0.520952)))))))))
577
+ ((p.ph_vlng is 0)
578
+ ((pos_in_syl < 0.7)
579
+ ((R:SylStructure.parent.position_type is final)
580
+ ((ph_ctype is r)
581
+ ((0.966357 0.185827))
582
+ ((ph_ctype is s)
583
+ ((0.647163 0.0332298))
584
+ ((0.692972 -0.534917))))
585
+ ((ph_ctype is s)
586
+ ((0.881521 0.575107))
587
+ ((p.ph_ctype is f)
588
+ ((0.8223 -0.111275))
589
+ ((R:SylStructure.parent.last_accent < 0.3)
590
+ ((0.969188 0.09447))
591
+ ((0.894438 0.381947))))))
592
+ ((p.ph_ctype is f)
593
+ ((0.479748 -0.490108))
594
+ ((0.813125 -0.201268))))
595
+ ((ph_ctype is s)
596
+ ((0.908566 1.20397))
597
+ ((R:SylStructure.parent.last_accent < 1.2)
598
+ ((0.88078 0.636568))
599
+ ((0.978087 1.07763))))))
600
+ ((pos_in_syl < 1.3)
601
+ ((R:SylStructure.parent.syl_break is 0)
602
+ ((pos_in_syl < 0.1)
603
+ ((R:SylStructure.parent.position_type is initial)
604
+ ((p.ph_ctype is n)
605
+ ((0.801651 -0.0163359))
606
+ ((ph_ctype is s)
607
+ ((n.ph_ctype is r)
608
+ ((0.893307 1.07253))
609
+ ((p.ph_vlng is 0)
610
+ ((0.92651 0.525806))
611
+ ((0.652444 0.952792))))
612
+ ((p.ph_vlng is 0)
613
+ ((seg_onsetcoda is coda)
614
+ ((0.820151 0.469117))
615
+ ((p.ph_ctype is f)
616
+ ((0.747972 -0.0716448))
617
+ ((ph_ctype is f)
618
+ ((0.770882 0.457137))
619
+ ((0.840905 0.102492)))))
620
+ ((R:SylStructure.parent.syl_out < 1.1)
621
+ ((0.667824 0.697337))
622
+ ((0.737967 0.375114))))))
623
+ ((ph_vheight is 1)
624
+ ((0.624353 0.410671))
625
+ ((R:SylStructure.parent.asyl_in < 0.8)
626
+ ((0.647905 -0.331055))
627
+ ((p.ph_ctype is s)
628
+ ((0.629039 -0.240616))
629
+ ((0.749277 -0.0191273))))))
630
+ ((ph_vheight is 3)
631
+ ((p.ph_ctype is s)
632
+ ((0.626922 0.556537))
633
+ ((0.789357 0.153892)))
634
+ ((seg_onsetcoda is coda)
635
+ ((n.ph_ctype is 0)
636
+ ((R:SylStructure.parent.parent.word_numsyls < 3.4)
637
+ ((0.744714 0.123242))
638
+ ((0.742039 0.295753)))
639
+ ((seg_coda_fric is 0)
640
+ ((R:SylStructure.parent.parent.word_numsyls < 2.4)
641
+ ((ph_vheight is 1)
642
+ ((0.549715 -0.341018))
643
+ ((0.573641 -0.00893114)))
644
+ ((nn.ph_vfront is 2)
645
+ ((0.67099 -0.744625))
646
+ ((0.664438 -0.302803))))
647
+ ((p.ph_vlng is 0)
648
+ ((0.630028 0.113815))
649
+ ((0.632794 -0.128733)))))
650
+ ((ph_ctype is r)
651
+ ((0.367169 -0.854509))
652
+ ((0.94334 -0.216179))))))
653
+ ((n.ph_ctype is f)
654
+ ((ph_vlng is 0)
655
+ ((1.3089 0.46195))
656
+ ((R:SylStructure.parent.syl_codasize < 1.3)
657
+ ((1.07673 0.657169))
658
+ ((pp.ph_vlng is 0)
659
+ ((0.972319 1.08222))
660
+ ((1.00038 1.46257)))))
661
+ ((p.ph_vlng is l)
662
+ ((1.03617 0.785204))
663
+ ((p.ph_vlng is a)
664
+ ((R:SylStructure.parent.position_type is final)
665
+ ((1.00681 0.321168))
666
+ ((0.928115 0.950834)))
667
+ ((ph_vlng is 0)
668
+ ((pos_in_syl < 0.1)
669
+ ((R:SylStructure.parent.position_type is final)
670
+ ((0.863682 -0.167374))
671
+ ((nn.ph_vheight is 0)
672
+ ((p.ph_ctype is f)
673
+ ((0.773591 -0.00374425))
674
+ ((R:SylStructure.parent.syl_out < 1.1)
675
+ ((0.951802 0.228448))
676
+ ((1.02282 0.504252))))
677
+ ((1.09721 0.736476))))
678
+ ((R:SylStructure.parent.position_type is final)
679
+ ((1.04302 0.0590974))
680
+ ((0.589208 -0.431535))))
681
+ ((n.ph_ctype is 0)
682
+ ((1.27879 1.00642))
683
+ ((ph_vlng is s)
684
+ ((R:SylStructure.parent.asyl_in < 1.4)
685
+ ((0.935787 0.481652))
686
+ ((0.9887 0.749861)))
687
+ ((R:SylStructure.parent.syl_out < 1.1)
688
+ ((R:SylStructure.parent.position_type is final)
689
+ ((0.921307 0.0696307))
690
+ ((0.83675 0.552212)))
691
+ ((0.810076 -0.0479225))))))))))
692
+ ((ph_ctype is s)
693
+ ((n.ph_ctype is s)
694
+ ((0.706959 -1.0609))
695
+ ((p.ph_ctype is n)
696
+ ((0.850614 -0.59933))
697
+ ((n.ph_ctype is r)
698
+ ((0.665947 0.00698725))
699
+ ((n.ph_ctype is 0)
700
+ ((R:SylStructure.parent.position_type is initial)
701
+ ((0.762889 -0.0649044))
702
+ ((0.723956 -0.248899)))
703
+ ((R:SylStructure.parent.sub_phrases < 1.4)
704
+ ((0.632957 -0.601987))
705
+ ((0.889114 -0.302401)))))))
706
+ ((ph_ctype is f)
707
+ ((R:SylStructure.parent.syl_codasize < 2.2)
708
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
709
+ ((R:SylStructure.parent.syl_out < 1.1)
710
+ ((0.865267 0.164636))
711
+ ((0.581827 -0.0989051)))
712
+ ((nn.ph_vfront is 2)
713
+ ((0.684459 -0.316836))
714
+ ((0.778854 -0.0961191))))
715
+ ((R:SylStructure.parent.syl_out < 1.1)
716
+ ((p.ph_ctype is s)
717
+ ((0.837964 -0.429437))
718
+ ((0.875304 -0.0652743)))
719
+ ((0.611071 -0.635089))))
720
+ ((p.ph_ctype is r)
721
+ ((R:SylStructure.parent.syl_out < 1.1)
722
+ ((0.762012 0.0139361))
723
+ ((0.567983 -0.454845)))
724
+ ((R:SylStructure.parent.syl_codasize < 2.2)
725
+ ((ph_ctype is l)
726
+ ((1.18845 0.809091))
727
+ ((R:SylStructure.parent.position_type is initial)
728
+ ((ph_ctype is n)
729
+ ((0.773548 -0.277092))
730
+ ((1.01586 0.281001)))
731
+ ((p.ph_ctype is 0)
732
+ ((1.06831 0.699145))
733
+ ((0.924189 0.241873)))))
734
+ ((R:SylStructure.parent.syl_break is 0)
735
+ ((ph_ctype is n)
736
+ ((0.592321 -0.470784))
737
+ ((0.778688 -0.072112)))
738
+ ((n.ph_ctype is s)
739
+ ((1.08848 0.0733489))
740
+ ((1.25674 0.608371))))))))))
741
+ ((pos_in_syl < 0.7)
742
+ ((p.ph_vlng is 0)
743
+ ((R:SylStructure.parent.position_type is mid)
744
+ ((ph_ctype is 0)
745
+ ((ph_vheight is 2)
746
+ ((0.456225 -0.293282))
747
+ ((0.561529 -0.0816115)))
748
+ ((0.6537 -0.504024)))
749
+ ((ph_ctype is s)
750
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
751
+ ((1.31586 0.98395))
752
+ ((R:SylStructure.parent.position_type is single)
753
+ ((0.816869 0.634789))
754
+ ((R:SylStructure.parent.syl_out < 4.4)
755
+ ((1.05578 0.479029))
756
+ ((R:SylStructure.parent.asyl_in < 0.4)
757
+ ((1.11813 0.143214))
758
+ ((0.87178 0.406834))))))
759
+ ((n.ph_ctype is n)
760
+ ((R:SylStructure.parent.last_accent < 0.6)
761
+ ((0.838154 -0.415599))
762
+ ((0.924024 0.110288)))
763
+ ((seg_onsetcoda is coda)
764
+ ((nn.ph_vfront is 2)
765
+ ((0.670096 0.0314187))
766
+ ((n.ph_ctype is f)
767
+ ((1.00363 0.693893))
768
+ ((R:SylStructure.parent.syl_out < 6)
769
+ ((0.772363 0.215675))
770
+ ((0.920313 0.574068)))))
771
+ ((R:SylStructure.parent.position_type is final)
772
+ ((0.673837 -0.458142))
773
+ ((R:SylStructure.parent.sub_phrases < 2.8)
774
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
775
+ ((0.894817 0.304628))
776
+ ((ph_ctype is n)
777
+ ((0.787302 -0.23094))
778
+ ((R:SylStructure.parent.asyl_in < 1.2)
779
+ ((ph_ctype is f)
780
+ ((R:SylStructure.parent.last_accent < 0.5)
781
+ ((1.12278 0.326954))
782
+ ((0.802236 -0.100616)))
783
+ ((0.791255 -0.0919132)))
784
+ ((0.95233 0.219053)))))
785
+ ((R:SylStructure.parent.position_type is initial)
786
+ ((ph_ctype is f)
787
+ ((1.0616 0.216118))
788
+ ((0.703216 -0.00834086)))
789
+ ((ph_ctype is f)
790
+ ((1.22277 0.761763))
791
+ ((0.904811 0.332721))))))))))
792
+ ((ph_vheight is 0)
793
+ ((p.ph_vlng is s)
794
+ ((0.873379 0.217178))
795
+ ((n.ph_ctype is r)
796
+ ((0.723915 1.29451))
797
+ ((n.ph_ctype is 0)
798
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 1)
799
+ ((R:SylStructure.parent.sub_phrases < 4)
800
+ ((seg_coda_fric is 0)
801
+ ((p.ph_vlng is l)
802
+ ((0.849154 0.945261))
803
+ ((0.633261 0.687498)))
804
+ ((0.728546 0.403076)))
805
+ ((0.850962 1.00255)))
806
+ ((0.957999 1.09113)))
807
+ ((0.85771 0.209045)))))
808
+ ((ph_vheight is 2)
809
+ ((0.803401 -0.0544067))
810
+ ((0.681353 0.256045)))))
811
+ ((n.ph_ctype is f)
812
+ ((ph_ctype is s)
813
+ ((p.ph_vlng is 0)
814
+ ((0.479307 -0.9673))
815
+ ((0.700477 -0.351397)))
816
+ ((ph_ctype is f)
817
+ ((0.73467 -0.6233))
818
+ ((R:SylStructure.parent.syl_break is 0)
819
+ ((p.ph_ctype is s)
820
+ ((0.56282 0.266234))
821
+ ((p.ph_ctype is r)
822
+ ((0.446203 -0.302281))
823
+ ((R:SylStructure.parent.sub_phrases < 2.7)
824
+ ((ph_ctype is 0)
825
+ ((0.572016 -0.0102436))
826
+ ((0.497358 -0.274514)))
827
+ ((0.545477 0.0482177)))))
828
+ ((ph_vlng is s)
829
+ ((0.805269 0.888495))
830
+ ((ph_ctype is n)
831
+ ((0.869854 0.653018))
832
+ ((R:SylStructure.parent.sub_phrases < 2.2)
833
+ ((0.735031 0.0612886))
834
+ ((0.771859 0.346637))))))))
835
+ ((R:SylStructure.parent.syl_codasize < 1.4)
836
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.3)
837
+ ((R:SylStructure.parent.position_type is initial)
838
+ ((0.743458 0.0411808))
839
+ ((1.13068 0.613305)))
840
+ ((pos_in_syl < 1.2)
841
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 1)
842
+ ((1.11481 0.175467))
843
+ ((0.937893 -0.276407)))
844
+ ((0.74264 -0.550878))))
845
+ ((pos_in_syl < 3.4)
846
+ ((seg_onsetcoda is coda)
847
+ ((ph_ctype is r)
848
+ ((n.ph_ctype is s)
849
+ ((0.714319 -0.240328))
850
+ ((p.ph_ctype is 0)
851
+ ((0.976987 0.330352))
852
+ ((1.1781 -0.0816682))))
853
+ ((ph_ctype is l)
854
+ ((n.ph_ctype is 0)
855
+ ((1.39137 0.383533))
856
+ ((0.725585 -0.324515)))
857
+ ((ph_vheight is 3)
858
+ ((ph_vlng is d)
859
+ ((0.802626 -0.62487))
860
+ ((n.ph_ctype is r)
861
+ ((0.661091 -0.513869))
862
+ ((R:SylStructure.parent.position_type is initial)
863
+ ((R:SylStructure.parent.parent.word_numsyls < 2.4)
864
+ ((0.482285 0.207874))
865
+ ((0.401601 -0.0204711)))
866
+ ((0.733755 0.397372)))))
867
+ ((n.ph_ctype is r)
868
+ ((p.ph_ctype is 0)
869
+ ((pos_in_syl < 1.2)
870
+ ((0.666325 0.271734))
871
+ ((nn.ph_vheight is 0)
872
+ ((0.642401 -0.261466))
873
+ ((0.783684 -0.00956571))))
874
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
875
+ ((0.692225 -0.381895))
876
+ ((0.741921 -0.0898767))))
877
+ ((nn.ph_vfront is 2)
878
+ ((ph_ctype is s)
879
+ ((0.697527 -1.12626))
880
+ ((n.ph_ctype is s)
881
+ ((ph_vlng is 0)
882
+ ((R:SylStructure.parent.sub_phrases < 2.4)
883
+ ((0.498719 -0.906926))
884
+ ((0.635342 -0.625651)))
885
+ ((0.45886 -0.385089)))
886
+ ((0.848596 -0.359702))))
887
+ ((p.ph_vlng is a)
888
+ ((p.ph_ctype is 0)
889
+ ((0.947278 0.216904))
890
+ ((0.637933 -0.394349)))
891
+ ((p.ph_ctype is r)
892
+ ((R:SylStructure.parent.syl_break is 0)
893
+ ((0.529903 -0.860573))
894
+ ((0.581378 -0.510488)))
895
+ ((ph_vlng is 0)
896
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
897
+ ((seg_onset_stop is 0)
898
+ ((R:SylStructure.parent.syl_break is 0)
899
+ ((p.ph_vlng is d)
900
+ ((0.768363 0.0108428))
901
+ ((ph_ctype is s)
902
+ ((0.835756 -0.035054))
903
+ ((ph_ctype is f)
904
+ ((p.ph_vlng is s)
905
+ ((0.602016 -0.179727))
906
+ ((0.640126 -0.297341)))
907
+ ((0.674628 -0.542602)))))
908
+ ((ph_ctype is s)
909
+ ((0.662261 -0.60496))
910
+ ((0.662088 -0.432058))))
911
+ ((R:SylStructure.parent.syl_out < 4.4)
912
+ ((0.582448 -0.389079))
913
+ ((ph_ctype is s)
914
+ ((0.60413 -0.73564))
915
+ ((0.567153 -0.605444)))))
916
+ ((R:SylStructure.parent.R:Syllable.p.syl_break is 2)
917
+ ((0.761115 -0.827377))
918
+ ((ph_ctype is n)
919
+ ((0.855183 -0.275338))
920
+ ((R:SylStructure.parent.syl_break is 0)
921
+ ((0.788288 -0.802801))
922
+ ((R:SylStructure.parent.syl_codasize < 2.2)
923
+ ((0.686134 -0.371234))
924
+ ((0.840184 -0.772883)))))))
925
+ ((pos_in_syl < 1.2)
926
+ ((R:SylStructure.parent.syl_break is 0)
927
+ ((n.ph_ctype is n)
928
+ ((0.423592 -0.655006))
929
+ ((R:SylStructure.parent.syl_out < 4.4)
930
+ ((0.595269 -0.303751))
931
+ ((0.478433 -0.456882))))
932
+ ((0.688133 -0.133182)))
933
+ ((seg_onset_stop is 0)
934
+ ((1.27464 0.114442))
935
+ ((0.406837 -0.167545))))))))))))
936
+ ((ph_ctype is r)
937
+ ((0.462874 -0.87695))
938
+ ((R:SylStructure.parent.R:Syllable.n.syl_onsetsize < 0.2)
939
+ ((0.645442 -0.640572))
940
+ ((0.673717 -0.321322)))))
941
+ ((0.61008 -0.925472))))))))
942
+ ;; RMSE 0.8085 Correlation is 0.5899 Mean (abs) Error 0.6024 (0.5393)
943
+
944
+
945
+ ))
946
+
947
+ (provide 'gswdurtreeZ)
CosyVoice-ttsfrd/resource/festival/holmes_phones.scm ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;; ;;
3
+ ;; Centre for Speech Technology Research ;;
4
+ ;; University of Edinburgh, UK ;;
5
+ ;; Copyright (c) 1996,1997 ;;
6
+ ;; All Rights Reserved. ;;
7
+ ;; ;;
8
+ ;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;; this software and its documentation without restriction, including ;;
10
+ ;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;; the following conditions: ;;
14
+ ;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;; conditions and the following disclaimer. ;;
16
+ ;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;; 3. Original authors' names are not deleted. ;;
18
+ ;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;; derived from this software without specific prior written ;;
20
+ ;; permission. ;;
21
+ ;; ;;
22
+ ;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;; THIS SOFTWARE. ;;
31
+ ;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;
34
+ ;; A definition of the Holmes phone set used by the Donovan LPC
35
+ ;; diphone synthesizer, the rest of the synthesis process will
36
+ ;; typically use mrpa phones and map to these.
37
+ ;;
38
+ ;; Hmm not sure I've got the right mapping (as usual)
39
+
40
+ (defPhoneSet
41
+ holmes
42
+ ;;; Phone Features
43
+ (;; vowel or consonant
44
+ (vc + -)
45
+ ;; vowel length: short long dipthong schwa
46
+ (vlng s l d a 0)
47
+ ;; vowel height: high mid low
48
+ (vheight 1 2 3 - 0)
49
+ ;; vowel frontness: front mid back
50
+ (vfront 1 2 3 - 0)
51
+ ;; lip rounding
52
+ (vrnd + - 0)
53
+ ;; consonant type: stop fricative affricative nasal lateral approximant
54
+ (ctype s f a n l r 0)
55
+ ;; place of articulation: labial alveolar palatal labio-dental
56
+ ;; dental velar glottal
57
+ (cplace l a p b d v g 0)
58
+ ;; consonant voicing
59
+ (cvox + - 0)
60
+ )
61
+ ;; Phone set members
62
+ (
63
+ ;; Note these features were set by awb so they are wrong !!!
64
+ (ee + l 1 1 - 0 0 0) ;; beet
65
+ (i + s 1 1 - 0 0 0) ;; bit
66
+ (ai + d 2 1 - 0 0 0) ;; gate
67
+ (e + s 2 1 - 0 0 0) ;; get
68
+ (aa + s 3 1 - 0 0 0) ;; fat
69
+ (ar + l 3 3 - 0 0 0) ;; father
70
+ (aw + l 3 3 + 0 0 0) ;; lawn
71
+ (oa + d 2 2 - 0 0 0) ;; lone
72
+ (oo + s 1 3 + 0 0 0) ;; full
73
+ (uu + l 1 3 + 0 0 0) ;; fool
74
+ (o + s 2 3 + 0 0 0)
75
+ (er + l 2 2 - 0 0 0) ;; murder
76
+ (a + a 2 2 - 0 0 0) ;; about
77
+ (u + s 2 3 - 0 0 0) ;; but
78
+ (ie + d 3 2 - 0 0 0) ;; hide
79
+ (ou + d 3 2 + 0 0 0) ;; how
80
+ (oi + d 3 3 + 0 0 0) ;; toy
81
+ (eer + d 2 1 - 0 0 0)
82
+ (air + d 1 1 - 0 0 0)
83
+ (oor + d 3 1 + 0 0 0)
84
+ ;; (yu + l 2 3 + 0 0 +) ;; you ???
85
+
86
+ (p - 0 0 0 0 s l -)
87
+ (b - 0 0 0 0 s l +)
88
+ (t - 0 0 0 0 s a -)
89
+ (d - 0 0 0 0 s a +)
90
+ (k - 0 0 0 0 s v -)
91
+ (g - 0 0 0 0 s v +)
92
+ (f - 0 0 0 0 f b -)
93
+ (v - 0 0 0 0 f b +)
94
+ (th - 0 0 0 0 f d -)
95
+ (dh - 0 0 0 0 f d +)
96
+ (s - 0 0 0 0 f a -)
97
+ (z - 0 0 0 0 f a +)
98
+ (sh - 0 0 0 0 f p -)
99
+ (zh - 0 0 0 0 f p +)
100
+ (h - 0 0 0 0 f g -)
101
+ (m - 0 0 0 0 n l +)
102
+ (n - 0 0 0 0 n a +)
103
+ (ng - 0 0 0 0 n v +)
104
+ (ch - 0 0 0 0 a p -)
105
+ (j - 0 0 0 0 a p +)
106
+ (l - 0 0 0 0 l a +)
107
+ (w - 0 0 0 0 r l +)
108
+ (y - 0 0 0 0 r p +)
109
+ (r - 0 0 0 0 r a +)
110
+ ;; (wh - 0 - - + l l -) ;; ??
111
+ ;; (wh - 0 - - + l l +) ;; map to w
112
+ (# - 0 0 0 0 0 0 -)
113
+ )
114
+ )
115
+
116
+ (PhoneSet.silences '(#))
117
+
118
+ (provide 'holmes_phones)
CosyVoice-ttsfrd/resource/festival/hts.scm ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;; ---------------------------------------------------------------- ;;
2
+ ;; Nagoya Institute of Technology and ;;
3
+ ;; Carnegie Mellon University ;;
4
+ ;; Copyright (c) 2002 ;;
5
+ ;; All Rights Reserved. ;;
6
+ ;; ;;
7
+ ;; Permission is hereby granted, free of charge, to use and ;;
8
+ ;; distribute this software and its documentation without ;;
9
+ ;; restriction, including without limitation the rights to use, ;;
10
+ ;; copy, modify, merge, publish, distribute, sublicense, and/or ;;
11
+ ;; sell copies of this work, and to permit persons to whom this ;;
12
+ ;; work is furnished to do so, subject to the following conditions: ;;
13
+ ;; ;;
14
+ ;; 1. The code must retain the above copyright notice, this list ;;
15
+ ;; of conditions and the following disclaimer. ;;
16
+ ;; ;;
17
+ ;; 2. Any modifications must be clearly marked as such. ;;
18
+ ;; ;;
19
+ ;; 3. Original authors' names are not deleted. ;;
20
+ ;; ;;
21
+ ;; 4. The authors' names are not used to endorse or promote ;;
22
+ ;; products derived from this software without specific prior ;;
23
+ ;; written permission. ;;
24
+ ;; ;;
25
+ ;; NAGOYA INSTITUTE OF TECHNOLOGY, CARNEGIE MELLON UNIVERSITY AND ;;
26
+ ;; THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH ;;
27
+ ;; REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF ;;
28
+ ;; MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL NAGOYA INSTITUTE ;;
29
+ ;; OF TECHNOLOGY, CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS ;;
30
+ ;; BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ;;
31
+ ;; ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR ;;
32
+ ;; PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER ;;
33
+ ;; TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR ;;
34
+ ;; PERFORMANCE OF THIS SOFTWARE. ;;
35
+ ;; ;;
36
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
37
+ ;; Generic HTS support code and specific features ;;
38
+ ;; http://hts.ics.nitech.ac.jp ;;
39
+ ;; Author : Alan W Black <[email protected]> ;;
40
+ ;; Date : August 2002 (and April 2004) ;;
41
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42
+ ;; ;;
43
+ ;; Still has language specific features in here, that will have to ;;
44
+ ;; move out to the voices ;;
45
+ ;; ;;
46
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47
+
48
+ (defvar hts_synth_pre_hooks nil)
49
+ (defvar hts_synth_post_hooks nil)
50
+ (defvar hts_engine_params nil)
51
+
52
+ (defvar hts_duration_stretch 0)
53
+ (defvar hts_f0_mean 0)
54
+ (defvar hts_f0_std 1)
55
+ (defvar hts_fw_factor 0.42)
56
+ (defvar hts_total_length 0.0)
57
+ (defvar hts_uv_threshold 0.5)
58
+ (defvar hts_use_phone_align 0)
59
+
60
+ (defSynthType HTS
61
+ (let ((featfile (make_tmp_filename))
62
+ (mcepfile (make_tmp_filename))
63
+ (f0file (make_tmp_filename))
64
+ (wavfile (make_tmp_filename))
65
+ (labfile (make_tmp_filename)))
66
+
67
+ (apply_hooks hts_synth_pre_hooks utt)
68
+
69
+ (set! hts_output_params
70
+ (list
71
+ (list "-labelfile" featfile)
72
+ (list "-om" mcepfile)
73
+ (list "-of" f0file)
74
+ (list "-or" wavfile)
75
+ (list "-od" labfile))
76
+ )
77
+
78
+ (hts_dump_feats utt hts_feats_list featfile)
79
+
80
+ (HTS_Synthesize utt)
81
+
82
+ (delete-file featfile)
83
+ (delete-file mcepfile)
84
+ (delete-file f0file)
85
+ (delete-file wavfile)
86
+ (delete-file labfile)
87
+
88
+ (apply_hooks hts_synth_post_hooks utt)
89
+ utt)
90
+ )
91
+
92
+ (define (hts_feats_output ofd s)
93
+ "This is bad as it makes decisions about what the feats are"
94
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
95
+ ;;; SEGMENT
96
+
97
+ ; boundary
98
+ (format ofd "%10.0f %10.0f "
99
+ (* 10000000 (item.feat s "segment_start"))
100
+ (* 10000000 (item.feat s "segment_end")))
101
+
102
+ ; pp.name
103
+ (format ofd "%s" (if (string-equal "0" (item.feat s "p.p.name"))
104
+ "x" (item.feat s "p.p.name")))
105
+ ; p.name
106
+ (format ofd "^%s" (if (string-equal "0" (item.feat s "p.name"))
107
+ "x" (item.feat s "p.name")))
108
+ ; c.name
109
+ (format ofd "-%s" (if (string-equal "0" (item.feat s "name"))
110
+ "x" (item.feat s "name")))
111
+ ; n.name
112
+ (format ofd "+%s" (if (string-equal "0" (item.feat s "n.name"))
113
+ "x" (item.feat s "n.name")))
114
+ ; nn.name
115
+ (format ofd "=%s" (if (string-equal "0" (item.feat s "n.n.name"))
116
+ "x" (item.feat s "n.n.name")))
117
+
118
+ ; position in syllable (segment)
119
+ (format ofd "@")
120
+ (format ofd "%s" (if (string-equal "pau" (item.feat s "name"))
121
+ "x" (+ 1 (item.feat s "pos_in_syl"))))
122
+ (format ofd "_%s" (if (string-equal "pau" (item.feat s "name"))
123
+ "x" (- (item.feat s "R:SylStructure.parent.R:Syllable.syl_numphones")
124
+ (item.feat s "pos_in_syl"))))
125
+
126
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
127
+ ;;; SYLLABLE
128
+
129
+ ;; previous syllable
130
+
131
+ ; p.stress
132
+ (format ofd "/A:%s"
133
+ (if (string-equal "pau" (item.feat s "name"))
134
+ (item.feat s "p.R:SylStructure.parent.R:Syllable.stress")
135
+ (item.feat s "R:SylStructure.parent.R:Syllable.p.stress")))
136
+ ; p.accent
137
+ (format ofd "_%s"
138
+ (if (string-equal "pau" (item.feat s "name"))
139
+ (item.feat s "p.R:SylStructure.parent.R:Syllable.accented")
140
+ (item.feat s "R:SylStructure.parent.R:Syllable.p.accented")))
141
+ ; p.length
142
+ (format ofd "_%s"
143
+ (if (string-equal "pau" (item.feat s "name"))
144
+ (item.feat s "p.R:SylStructure.parent.R:Syllable.syl_numphones")
145
+ (item.feat s "R:SylStructure.parent.R:Syllable.p.syl_numphones")))
146
+ ;; current syllable
147
+
148
+ ; c.stress
149
+ (format ofd "/B:%s"
150
+ (if (string-equal "pau" (item.feat s "name"))
151
+ "x"
152
+ (item.feat s "R:SylStructure.parent.R:Syllable.stress")))
153
+ ; c.accent
154
+ (format ofd "-%s"
155
+ (if (string-equal "pau" (item.feat s "name"))
156
+ "x"
157
+ (item.feat s "R:SylStructure.parent.R:Syllable.accented")))
158
+ ; c.length
159
+ (format ofd "-%s"
160
+ (if (string-equal "pau" (item.feat s "name"))
161
+ "x"
162
+ (item.feat s "R:SylStructure.parent.R:Syllable.syl_numphones")))
163
+
164
+ ; position in word (syllable)
165
+ (format ofd "@%s"
166
+ (if (string-equal "pau" (item.feat s "name"))
167
+ "x"
168
+ (+ 1 (item.feat s "R:SylStructure.parent.R:Syllable.pos_in_word"))))
169
+ (format ofd "-%s"
170
+ (if (string-equal "pau" (item.feat s "name"))
171
+ "x"
172
+ (-
173
+ (item.feat s "R:SylStructure.parent.parent.R:Word.word_numsyls")
174
+ (item.feat s "R:SylStructure.parent.R:Syllable.pos_in_word"))))
175
+
176
+ ; position in phrase (syllable)
177
+ (format ofd "&%s"
178
+ (if (string-equal "pau" (item.feat s "name"))
179
+ "x"
180
+ (+ 1
181
+ (item.feat s "R:SylStructure.parent.R:Syllable.syl_in"))))
182
+ (format ofd "-%s"
183
+ (if (string-equal "pau" (item.feat s "name"))
184
+ "x"
185
+ (+ 1
186
+ (item.feat s "R:SylStructure.parent.R:Syllable.syl_out"))))
187
+
188
+ ; position in phrase (stressed syllable)
189
+ (format ofd "#%s"
190
+ (if (string-equal "pau" (item.feat s "name"))
191
+ "x"
192
+ (+ 1
193
+ (item.feat s "R:SylStructure.parent.R:Syllable.ssyl_in"))))
194
+ (format ofd "-%s"
195
+ (if (string-equal "pau" (item.feat s "name"))
196
+ "x"
197
+ (+ 1
198
+ (item.feat s "R:SylStructure.parent.R:Syllable.ssyl_out"))))
199
+
200
+ ; position in phrase (accented syllable)
201
+ (format ofd "$%s"
202
+ (if (string-equal "pau" (item.feat s "name"))
203
+ "x"
204
+ (+ 1
205
+ (item.feat s "R:SylStructure.parent.R:Syllable.asyl_in"))))
206
+ (format ofd "-%s"
207
+ (if (string-equal "pau" (item.feat s "name"))
208
+ "x"
209
+ (+ 1
210
+ (item.feat s "R:SylStructure.parent.R:Syllable.asyl_out"))))
211
+
212
+ ; distance from stressed syllable
213
+ (format ofd "!%s"
214
+ (if (string-equal "pau" (item.feat s "name"))
215
+ "x"
216
+ (item.feat s "R:SylStructure.parent.R:Syllable.lisp_distance_to_p_stress")))
217
+ (format ofd "-%s"
218
+ (if (string-equal "pau" (item.feat s "name"))
219
+ "x"
220
+ (item.feat s "R:SylStructure.parent.R:Syllable.lisp_distance_to_n_stress")))
221
+
222
+ ; distance from accented syllable
223
+ (format ofd ";%s"
224
+ (if (string-equal "pau" (item.feat s "name"))
225
+ "x"
226
+ (item.feat s "R:SylStructure.parent.R:Syllable.lisp_distance_to_p_accent")))
227
+ (format ofd "-%s"
228
+ (if (string-equal "pau" (item.feat s "name"))
229
+ "x"
230
+ (item.feat s "R:SylStructure.parent.R:Syllable.lisp_distance_to_n_accent")))
231
+
232
+ ; name of the vowel of current syllable
233
+ (format ofd "|%s"
234
+ (if (string-equal "pau" (item.feat s "name"))
235
+ "x"
236
+ (item.feat s "R:SylStructure.parent.R:Syllable.syl_vowel")))
237
+
238
+ ;; next syllable
239
+ (format ofd "/C:%s"
240
+ (if (string-equal "pau" (item.feat s "name"))
241
+ (item.feat s "n.R:SylStructure.parent.R:Syllable.stress")
242
+ (item.feat s "R:SylStructure.parent.R:Syllable.n.stress")))
243
+ ; n.accent
244
+ (format ofd "+%s"
245
+ (if (string-equal "pau" (item.feat s "name"))
246
+ (item.feat s "n.R:SylStructure.parent.R:Syllable.accented")
247
+ (item.feat s "R:SylStructure.parent.R:Syllable.n.accented")))
248
+ ; n.length
249
+ (format ofd "+%s"
250
+ (if (string-equal "pau" (item.feat s "name"))
251
+ (item.feat s "n.R:SylStructure.parent.R:Syllable.syl_numphones")
252
+ (item.feat s "R:SylStructure.parent.R:Syllable.n.syl_numphones")))
253
+
254
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
255
+ ; WORD
256
+
257
+ ;;;;;;;;;;;;;;;;;;
258
+ ;; previous word
259
+
260
+ ; p.gpos
261
+ (format ofd "/D:%s"
262
+ (if (string-equal "pau" (item.feat s "name"))
263
+ (item.feat s "p.R:SylStructure.parent.parent.R:Word.gpos")
264
+ (item.feat s "R:SylStructure.parent.parent.R:Word.p.gpos")))
265
+ ; p.lenght (syllable)
266
+ (format ofd "_%s"
267
+ (if (string-equal "pau" (item.feat s "name"))
268
+ (item.feat s "p.R:SylStructure.parent.parent.R:Word.word_numsyls")
269
+ (item.feat s "R:SylStructure.parent.parent.R:Word.p.word_numsyls")))
270
+
271
+ ;;;;;;;;;;;;;;;;;
272
+ ;; current word
273
+
274
+ ; c.gpos
275
+ (format ofd "/E:%s"
276
+ (if (string-equal "pau" (item.feat s "name"))
277
+ "x"
278
+ (item.feat s "R:SylStructure.parent.parent.R:Word.gpos")))
279
+ ; c.lenght (syllable)
280
+ (format ofd "+%s"
281
+ (if (string-equal "pau" (item.feat s "name"))
282
+ "x"
283
+ (item.feat s "R:SylStructure.parent.parent.R:Word.word_numsyls")))
284
+
285
+ ; position in phrase (word)
286
+ (format ofd "@%s"
287
+ (if (string-equal "pau" (item.feat s "name"))
288
+ "x"
289
+ (+ 1 (item.feat s "R:SylStructure.parent.parent.R:Word.pos_in_phrase"))))
290
+ (format ofd "+%s"
291
+ (if (string-equal "pau" (item.feat s "name"))
292
+ "x"
293
+ (item.feat s "R:SylStructure.parent.parent.R:Word.words_out")))
294
+
295
+ ; position in phrase (content word)
296
+ (format ofd "&%s"
297
+ (if (string-equal "pau" (item.feat s "name"))
298
+ "x"
299
+ (item.feat s "R:SylStructure.parent.parent.R:Word.content_words_in")))
300
+ (format ofd "+%s"
301
+ (if (string-equal "pau" (item.feat s "name"))
302
+ "x"
303
+ (item.feat s "R:SylStructure.parent.parent.R:Word.content_words_out")))
304
+
305
+ ; distance from content word in phrase
306
+ (format ofd "#%s"
307
+ (if (string-equal "pau" (item.feat s "name"))
308
+ "x"
309
+ (item.feat s "R:SylStructure.parent.parent.R:Word.lisp_distance_to_p_content")))
310
+ (format ofd "+%s"
311
+ (if (string-equal "pau" (item.feat s "name"))
312
+ "x"
313
+ (item.feat s "R:SylStructure.parent.parent.R:Word.lisp_distance_to_n_content")))
314
+
315
+ ;;;;;;;;;;;;;;
316
+ ;; next word
317
+
318
+ ; n.gpos
319
+ (format ofd "/F:%s"
320
+ (if (string-equal "pau" (item.feat s "name"))
321
+ (item.feat s "n.R:SylStructure.parent.parent.R:Word.gpos")
322
+ (item.feat s "R:SylStructure.parent.parent.R:Word.n.gpos")))
323
+ ; n.lenghte (syllable)
324
+ (format ofd "_%s"
325
+ (if (string-equal "pau" (item.feat s "name"))
326
+ (item.feat s "n.R:SylStructure.parent.parent.R:Word.word_numsyls")
327
+ (item.feat s "R:SylStructure.parent.parent.R:Word.n.word_numsyls")))
328
+
329
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
330
+ ; PHRASE
331
+
332
+ ;;;;;;;;;;;;;;;;;;;;
333
+ ;; previous phrase
334
+
335
+ ; length of previous phrase (syllable)
336
+ (format ofd "/G:%s"
337
+ (if (string-equal "pau" (item.feat s "name"))
338
+ (item.feat s "p.R:SylStructure.parent.parent.R:Phrase.parent.lisp_num_syls_in_phrase")
339
+ (item.feat s "R:SylStructure.parent.parent.R:Phrase.parent.p.lisp_num_syls_in_phrase")))
340
+
341
+ ; length of previous phrase (word)
342
+ (format ofd "_%s"
343
+ (if (string-equal "pau" (item.feat s "name"))
344
+ (item.feat s "p.R:SylStructure.parent.parent.R:Phrase.parent.lisp_num_words_in_phrase")
345
+ (item.feat s "R:SylStructure.parent.parent.R:Phrase.parent.p.lisp_num_words_in_phrase")))
346
+
347
+ ;;;;;;;;;;;;;;;;;;;;
348
+ ;; current phrase
349
+
350
+ ; length of current phrase (syllable)
351
+ (format ofd "/H:%s"
352
+ (if (string-equal "pau" (item.feat s "name"))
353
+ "x"
354
+ (item.feat s "R:SylStructure.parent.parent.R:Phrase.parent.lisp_num_syls_in_phrase")))
355
+
356
+ ; length of current phrase (word)
357
+ (format ofd "=%s"
358
+ (if (string-equal "pau" (item.feat s "name"))
359
+ "x"
360
+ (item.feat s "R:SylStructure.parent.parent.R:Phrase.parent.lisp_num_words_in_phrase")))
361
+
362
+ ; position in major phrase (phrase)
363
+ (format ofd "@%s"
364
+ (+ 1 (item.feat s "R:SylStructure.parent.R:Syllable.sub_phrases")))
365
+ (format ofd "=%s"
366
+ (-
367
+ (item.feat s "lisp_total_phrases")
368
+ (item.feat s "R:SylStructure.parent.R:Syllable.sub_phrases")))
369
+
370
+ ; type of tobi endtone of current phrase
371
+ (format ofd "|%s"
372
+ (item.feat s "R:SylStructure.parent.parent.R:Phrase.parent.daughtern.R:SylStructure.daughtern.tobi_endtone"))
373
+
374
+ ;;;;;;;;;;;;;;;;;;;;
375
+ ;; next phrase
376
+
377
+ ; length of next phrase (syllable)
378
+ (format ofd "/I:%s"
379
+ (if (string-equal "pau" (item.feat s "name"))
380
+ (item.feat s "n.R:SylStructure.parent.parent.R:Phrase.parent.lisp_num_syls_in_phrase")
381
+ (item.feat s "R:SylStructure.parent.parent.R:Phrase.parent.n.lisp_num_syls_in_phrase")))
382
+
383
+ ; length of next phrase (word)
384
+ (format ofd "=%s"
385
+ (if (string-equal "pau" (item.feat s "name"))
386
+ (item.feat s "n.R:SylStructure.parent.parent.R:Phrase.parent.lisp_num_words_in_phrase")
387
+ (item.feat s "R:SylStructure.parent.parent.R:Phrase.parent.n.lisp_num_words_in_phrase")))
388
+
389
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
390
+ ; UTTERANCE
391
+
392
+ ; length (syllable)
393
+ (format ofd "/J:%s" (item.feat s "lisp_total_syls"))
394
+
395
+ ; length (word)
396
+ (format ofd "+%s" (item.feat s "lisp_total_words"))
397
+
398
+ ; length (phrase)
399
+ (format ofd "-%s" (item.feat s "lisp_total_phrases"))
400
+
401
+ (format ofd "\n")
402
+
403
+ )
404
+
405
+ (define (hts_dump_feats utt feats ofile)
406
+ (let ((ofd (fopen ofile "w")))
407
+ (mapcar
408
+ (lambda (s)
409
+ (hts_feats_output ofd s))
410
+ (utt.relation.items utt 'Segment))
411
+ (fclose ofd)
412
+ ))
413
+
414
+
415
+ ;;
416
+ ;; Extra features
417
+ ;; From Segment items refer by
418
+ ;;
419
+ ;; R:SylStructure.parent.parent.R:Phrase.parent.lisp_num_syls_in_phrase
420
+ ;; R:SylStructure.parent.parent.R:Phrase.parent.lisp_num_words_in_phrase
421
+ ;; lisp_total_words
422
+ ;; lisp_total_syls
423
+ ;; lisp_total_phrases
424
+ ;;
425
+ ;; The last three will act on any item
426
+
427
+ (define (distance_to_p_content i)
428
+ (let ((c 0) (rc 0 ) (w (item.relation.prev i "Phrase")))
429
+ (while w
430
+ (set! c (+ 1 c))
431
+ (if (string-equal "1" (item.feat w "contentp"))
432
+ (begin
433
+ (set! rc c)
434
+ (set! w nil))
435
+ (set! w (item.prev w)))
436
+ )
437
+ rc))
438
+
439
+ (define (distance_to_n_content i)
440
+ (let ((c 0) (rc 0) (w (item.relation.next i "Phrase")))
441
+ (while w
442
+ (set! c (+ 1 c))
443
+ (if (string-equal "1" (item.feat w "contentp"))
444
+ (begin
445
+ (set! rc c)
446
+ (set! w nil))
447
+ (set! w (item.next w)))
448
+ )
449
+ rc))
450
+
451
+ (define (distance_to_p_accent i)
452
+ (let ((c 0) (rc 0 ) (w (item.relation.prev i "Syllable")))
453
+ (while (and w (member_string (item.feat w "syl_break") '("0" "1")))
454
+ (set! c (+ 1 c))
455
+ (if (string-equal "1" (item.feat w "accented"))
456
+ (begin
457
+ (set! rc c)
458
+ (set! w nil))
459
+ (set! w (item.prev w)))
460
+ )
461
+ rc))
462
+
463
+ (define (distance_to_n_accent i)
464
+ (let ((c 0) (rc 0 ) (w (item.relation.next i "Syllable")))
465
+ (while (and w (member_string (item.feat w "p.syl_break") '("0" "1")))
466
+ (set! c (+ 1 c))
467
+ (if (string-equal "1" (item.feat w "accented"))
468
+ (begin
469
+ (set! rc c)
470
+ (set! w nil))
471
+ (set! w (item.next w)))
472
+ )
473
+ rc))
474
+
475
+ (define (distance_to_p_stress i)
476
+ (let ((c 0) (rc 0 ) (w (item.relation.prev i "Syllable")))
477
+ (while (and w (member_string (item.feat w "syl_break") '("0" "1")))
478
+ (set! c (+ 1 c))
479
+ (if (string-equal "1" (item.feat w "stress"))
480
+ (begin
481
+ (set! rc c)
482
+ (set! w nil))
483
+ (set! w (item.prev w)))
484
+ )
485
+ rc))
486
+
487
+ (define (distance_to_n_stress i)
488
+ (let ((c 0) (rc 0 ) (w (item.relation.next i "Syllable")))
489
+ (while (and w (member_string (item.feat w "p.syl_break") '("0" "1")))
490
+ (set! c (+ 1 c))
491
+ (if (string-equal "1" (item.feat w "stress"))
492
+ (begin
493
+ (set! rc c)
494
+ (set! w nil))
495
+ (set! w (item.next w)))
496
+ )
497
+ rc))
498
+
499
+ (define (num_syls_in_phrase i)
500
+ (apply
501
+ +
502
+ (mapcar
503
+ (lambda (w)
504
+ (length (item.relation.daughters w 'SylStructure)))
505
+ (item.relation.daughters i 'Phrase))))
506
+
507
+ (define (num_words_in_phrase i)
508
+ (length (item.relation.daughters i 'Phrase)))
509
+
510
+ (define (total_words w)
511
+ (length
512
+ (utt.relation.items (item.get_utt w) 'Word)))
513
+
514
+ (define (total_syls s)
515
+ (length
516
+ (utt.relation.items (item.get_utt s) 'Syllable)))
517
+
518
+ (define (total_phrases s)
519
+ (length
520
+ (utt.relation_tree (item.get_utt s) 'Phrase)))
521
+
522
+ (provide 'hts)
CosyVoice-ttsfrd/resource/festival/init.scm ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; Initialisation file -- loaded before anything else
35
+ ;;;
36
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
37
+
38
+ ;;; Basic siod library (need this before load_library or require works)
39
+ (load (path-append libdir "siod.scm"))
40
+
41
+ (defvar home-directory (or (getenv "HOME") "/")
42
+ "home-directory
43
+ Place looked at for .festivalrc etc.")
44
+
45
+ ;;; User startup initialization, can be used to override load-path
46
+ ;;; to allow alternate basic modules to be loaded.
47
+ (if (probe_file (path-append home-directory ".siodvarsrc"))
48
+ (load (path-append home-directory ".siodvarsrc")))
49
+
50
+ (if (probe_file (path-append home-directory ".festivalvarsrc"))
51
+ (load (path-append home-directory ".festivalvarsrc")))
52
+
53
+ ;;; A chance to set various variables to a local setting e.g.
54
+ ;;; lexdir, voices_dir audio etc etc.
55
+ (if (probe_file (path-append libdir "sitevars.scm"))
56
+ (load (path-append libdir "sitevars.scm")))
57
+
58
+ ;;; CSTR siod extensions
59
+ (require 'cstr)
60
+
61
+ ;;; Festival specific definitions
62
+ (require 'festival)
63
+
64
+ ;;; Dealing with module descriptions
65
+ (require 'module_description)
66
+
67
+ ;;; Web related definitions
68
+ (require 'web)
69
+
70
+ ;;; Utterance types and support
71
+ (require 'synthesis)
72
+
73
+ ;;; Some default parameters
74
+ (Parameter.def 'Wavefiletype 'riff)
75
+
76
+ ;;; Set default audio method
77
+ (cond
78
+ ((member 'nas *modules*)
79
+ (Parameter.def 'Audio_Method 'netaudio))
80
+ ((member 'esd *modules*)
81
+ (Parameter.def 'Audio_Method 'esdaudio))
82
+ ((member 'sun16audio *modules*)
83
+ (Parameter.def 'Audio_Method 'sun16audio))
84
+ ((member 'freebsd16audio *modules*)
85
+ (Parameter.def 'Audio_Method 'freebsd16audio))
86
+ ((member 'linux16audio *modules*)
87
+ (Parameter.def 'Audio_Method 'linux16audio))
88
+ ((member 'irixaudio *modules*)
89
+ (Parameter.def 'Audio_Method 'irixaudio))
90
+ ((member 'macosxaudio *modules*)
91
+ (Parameter.def 'Audio_Method 'macosxaudio))
92
+ ((member 'win32audio *modules*)
93
+ (Parameter.def 'Audio_Method 'win32audio))
94
+ ((member 'os2audio *modules*)
95
+ (Parameter.def 'Audio_Method 'os2audio))
96
+ ((member 'mplayeraudio *modules*)
97
+ (Parameter.def 'Audio_Method 'mplayeraudio))
98
+ (t ;; can't find direct support so guess that /dev/audio for 8k ulaw exists
99
+ (Parameter.def 'Audio_Method 'sunaudio)))
100
+ ;;; If you have an external program to play audio add its definition
101
+ ;;; in siteinit.scm
102
+
103
+ ;;; The audio spooler doesn't work under Windows so redefine audio_mode
104
+ (if (member 'mplayeraudio *modules*)
105
+ (define (audio_mode param) param)
106
+ )
107
+
108
+ ;;; Intonation
109
+ (require 'intonation)
110
+
111
+ ;;; Duration
112
+ (require 'duration)
113
+
114
+ ;;; A large lexicon
115
+ (require 'lexicons)
116
+ (require 'pauses)
117
+
118
+ ;;; Part of speech prediction
119
+ (require 'pos)
120
+
121
+ ;;; Phrasing (dependent on pos)
122
+ (require 'phrase)
123
+
124
+ ;;; POstlexical rules
125
+ (require 'postlex)
126
+
127
+ ;;; Different voices
128
+ (require 'voices) ;; sets voice_default
129
+ (require 'languages)
130
+
131
+ ;;; Some higher level functions
132
+ (require 'token)
133
+ (require 'tts)
134
+
135
+ ;;;
136
+ ;;; Local site initialization, if the file exists load it
137
+ ;;;
138
+ (if (probe_file (path-append libdir "siteinit.scm"))
139
+ (load (path-append libdir "siteinit.scm")))
140
+
141
+ ;;; User initialization, if a user has a personal customization
142
+ ;;; file loaded it
143
+ (if (probe_file (path-append home-directory ".siodrc"))
144
+ (load (path-append home-directory ".siodrc")))
145
+
146
+ (if (probe_file (path-append home-directory ".festivalrc"))
147
+ (load (path-append home-directory ".festivalrc")))
148
+
149
+ ;;; Default voice (have to do something cute so autoloads still work)
150
+ (eval (list voice_default))
151
+
152
+ (provide 'init)
153
+
154
+
155
+
156
+
157
+
CosyVoice-ttsfrd/resource/festival/intonation.scm ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; Basic Intonation modules. These call appropriate sub-modules
35
+ ;;; depending on the chosen intonation methods
36
+ ;;;
37
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
38
+
39
+ ;;; These modules should predict intonation events/labels
40
+ ;;; based on information in the phrase and word streams
41
+
42
+ ; to detect prespecified accents (feature "accent" in 'Word relation)
43
+ ; AS 5/29/00
44
+
45
+ (define (tobi_accent_prespecified utt)
46
+ (let ((tobi_found nil)
47
+ (words (utt.relation.items utt 'Word)))
48
+
49
+ (while (and words (not tobi_found))
50
+ ; feature "accent" might be prespecified on words or tokens, AS 05/29/00
51
+ (if (item.feat.present (car words) 'accent)
52
+ (set! tobi_found t)
53
+ ; if Token relation exists, check tokens as well
54
+ (if (not (null (item.parent (item.relation (car words) 'Token))))
55
+ (if (item.feat.present (item.parent (item.relation (car words) 'Token)) 'accent)
56
+ (set! tobi_found t)
57
+ (set! words (cdr words)))
58
+ (set! words (cdr words)))))
59
+ tobi_found))
60
+
61
+ (set! int_accent_cart_tree_no_accent
62
+ '((NONE)))
63
+
64
+ (define (Intonation utt)
65
+ "(Intonation utt)
66
+ Select between different intonation modules depending on the Parameter
67
+ Int_Method. Currently offers three types: Simple, hats on each content
68
+ word; ToBI, a tree method for predicting ToBI accents; and Default a
69
+ really bad method with a simple downward sloping F0. This is the first
70
+ of a two-stage intonation prediction process. This adds accent-like
71
+ features to syllables, the second, Int_Targets generates the F0 contour
72
+ itself. [see Intonation]"
73
+
74
+ ; AS 5/29/00: Hack to avoid prediction of further accent labels
75
+ ; on utterance chunks that have already been annotated with
76
+ ; accent labels
77
+ ; use CART that doesn't assign any labels when using Intonation_Tree
78
+
79
+ (if (tobi_accent_prespecified utt)
80
+ (progn
81
+ (set! int_accent_cart_tree_save int_accent_cart_tree)
82
+ (set! int_accent_cart_tree int_accent_cart_tree_no_accent)
83
+ (Intonation_Tree utt)
84
+ (set! int_accent_cart_tree int_accent_cart_tree_save))
85
+
86
+ (let ((rval (apply_method 'Int_Method utt)))
87
+ (Parameter.get 'Int_Method)
88
+ (cond
89
+ (rval rval) ;; new style
90
+ ((eq 'Simple (Parameter.get 'Int_Method))
91
+ (Intonation_Simple utt))
92
+ ((eq 'ToBI (Parameter.get 'Int_Method))
93
+ (format t "Using Intonation_Tree")
94
+ (Intonation_Tree utt))
95
+ ((eq 'General (Parameter.get 'Int_Method))
96
+ (Intonation_Simple utt)) ;; yes this is a duplication
97
+ (t
98
+ (Intonation_Default utt))))))
99
+
100
+
101
+ ;;; These modules should create an actual F0 contour based on the
102
+ ;;; the existing intonational events/labels etc
103
+ ;;; Specifically this is called after durations have been predicted
104
+
105
+ (define (Int_Targets utt)
106
+ "(Int_Targets utt)
107
+ The second stage in F0 prediction. This generates F0 targets
108
+ related to segments using one of three methods, a simple hat,
109
+ linear regression based on ToBI markings, and a simple declining
110
+ slope. This second part deals with actual F0 values and durations,
111
+ while the previous section only deals with accent (and boundary tone)
112
+ assignment. [see Intonation]"
113
+ (let ((rval (apply_method 'Int_Target_Method utt)))
114
+ (cond
115
+ (rval rval) ;; new style
116
+ ((eq 'Simple (Parameter.get 'Int_Method))
117
+ (Int_Targets_Simple utt))
118
+ ((eq 'ToBI (Parameter.get 'Int_Method))
119
+ (Int_Targets_LR utt))
120
+ ((eq 'General (Parameter.get 'Int_Method))
121
+ (Int_Targets_General utt))
122
+ (t
123
+ (Int_Targets_Default utt)))))
124
+
125
+ ;;;
126
+ ;;; A tree that adds accents (H) to all content words
127
+ ;;; simple but better than nothing at all
128
+ ;;;
129
+ (set! simple_accent_cart_tree
130
+ '
131
+ ((R:SylStructure.parent.gpos is content)
132
+ ((stress is 1)
133
+ ((Accented))
134
+ ((position_type is single)
135
+ ((Accented))
136
+ ((NONE))))
137
+ ((NONE))))
138
+
139
+ (defvar duffint_params '((start 130) (end 110))
140
+ "duffint_params
141
+ Default parameters for Default (duff) intonation target generation.
142
+ This is an assoc list of parameters. Two parameters are supported
143
+ start specifies the start F0 in Hertz for an utterance, and end specifies
144
+ the end.")
145
+
146
+ ;;;
147
+ ;;; For simple testing, this function adds fixed duration and
148
+ ;;; monotone intonation to a set of phones
149
+ ;;;
150
+ (defvar FP_F0 120
151
+ "FP_F0
152
+ In using Fixed_Prosody as used in Phones type utterances and hence
153
+ SayPhones, this is the value in Hertz for the monotone F0.")
154
+ (defvar FP_duration 100
155
+ "FP_duration
156
+ In using Fixed_Prosody as used in Phones type utterances and hence
157
+ SayPhones, this is the fix value in ms for phone durations.")
158
+
159
+ (define (Fixed_Prosody utt)
160
+ "(Fixed_Prosody UTT)
161
+ Add fixed duration and fixed monotone F0 to the sgements in UTT.
162
+ Uses values of FP_duration and FP_F0 as fixed values."
163
+ (let (utt1
164
+ (dur_stretch (Parameter.get 'Duration_Stretch))
165
+ (orig_duffint_params duffint_params))
166
+ (Parameter.set 'Duration_Stretch (/ FP_duration 100.0))
167
+ (set! duffint_params (list (list 'start FP_F0) (list 'end FP_F0)))
168
+
169
+ (set! utt1 (Duration_Default utt))
170
+ (set! utt1 (Int_Targets_Default utt1))
171
+
172
+ ;; Reset Parameter values back
173
+ (Parameter.set 'Duration_Stretch dur_stretch)
174
+ (set! duffint_params orig_duffint_params)
175
+
176
+ utt1
177
+ )
178
+ )
179
+
180
+ (define (segment_dpitch seg)
181
+ "(segment_dpitch UTT SEG)
182
+ Returns delta pitch, this pitch minus previous pitch."
183
+ (-
184
+ (parse-number (item.feat utt seg 'seg_pitch))
185
+ (parse-number (item.feat utt seg 'R:Segment.p.seg_pitch))))
186
+
187
+ (provide 'intonation)
CosyVoice-ttsfrd/resource/festival/java.scm ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1998 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; Functions specific to supporting a Java client
35
+ ;;;
36
+
37
+ ;; none required yet
38
+
39
+ (provide 'java)
CosyVoice-ttsfrd/resource/festival/klatt_durs.scm ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; Phone duration info for Klatt rules, for mrpa phone set
35
+
36
+ (set! duration_klatt_params
37
+ '(
38
+ (a 230.0 80.0)
39
+ (aa 240.0 100.0)
40
+ (@ 120.0 60.0)
41
+ (@@ 180.0 80.0)
42
+ (ai 250.0 150.0)
43
+ (au 240.0 100.0)
44
+ (b 85.0 60.0)
45
+ (ch 70.0 50.0)
46
+ (d 75.0 50.0)
47
+ (dh 50.0 30.0)
48
+ (e 150.0 70.0)
49
+ (e@ 270.0 130.0)
50
+ (ei 180.0 100.0)
51
+ (f 100.0 80.0)
52
+ (g 80.0 60.0)
53
+ (h 80.0 20.0)
54
+ (i 135.0 40.0)
55
+ (i@ 230.0 100.0)
56
+ (ii 155.0 55)
57
+ (jh 70.0 50.0)
58
+ (k 80.0 60.0)
59
+ (l 80.0 40.0)
60
+ (m 70.0 60.0)
61
+ (n 60.0 50.0)
62
+ (ng 95.0 60.0)
63
+ (o 240.0 130.0)
64
+ (oi 280.0 150.0)
65
+ (oo 240.0 130.0)
66
+ (ou 220.0 80.0)
67
+ (p 90.0 50.0)
68
+ (r 80.0 30.0)
69
+ (s 105.0 60.0)
70
+ (sh 105.0 80.0)
71
+ (t 75.0 50.0)
72
+ (th 90.0 60.0)
73
+ (u 210.0 70.0)
74
+ (u@ 230.0 110.0)
75
+ (uh 160.0 60.0)
76
+ (uu 230.0 150.0)
77
+ (v 60.0 40.0)
78
+ (w 80.0 60.0)
79
+ (y 80.0 40.0)
80
+ (z 75.0 40.0)
81
+ (zh 70.0 40.0)
82
+ (# 100.0 100.0)
83
+ ))
84
+
85
+ (provide 'klatt_durs)
CosyVoice-ttsfrd/resource/festival/languages.scm ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; Specification of voices and some major choices of synthesis
35
+ ;;;
36
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
37
+ ;;;
38
+ ;;; This should use some sort of database description for voices so
39
+ ;;; new voices will become automatically available.
40
+ ;;;
41
+
42
+ (define (language_british_english)
43
+ "(language_british_english)
44
+ Set up language parameters for British English."
45
+ (require 'voices)
46
+ ;; Will get more elaborate, with different choices of voices in language
47
+
48
+ (set! male1 voice_rab_diphone)
49
+ (if (symbol-bound? 'voice_don_diphone)
50
+ (set! male2 voice_don_diphone))
51
+ (if (symbol-bound? 'voice_gsw_diphone)
52
+ (set! male3 voice_gsw_diphone))
53
+ (if (symbol-bound? 'voice_gsw_450)
54
+ (set! male4 voice_gsw_450))
55
+
56
+ (male1)
57
+ (Parameter.set 'Language 'britishenglish)
58
+ )
59
+
60
+ (define (language_american_english)
61
+ "(language_american_english)
62
+ Set up language parameters for Aemerican English."
63
+ (require 'voices)
64
+ (if (symbol-bound? 'voice_kal_diphone)
65
+ (set! female1 voice_kal_diphone))
66
+ (if (symbol-bound? 'voice_kal_diphone)
67
+ (set! male1 voice_kal_diphone))
68
+
69
+ (male1)
70
+ (Parameter.set 'Language 'americanenglish)
71
+ )
72
+
73
+ (define (language_scots_gaelic)
74
+ "(language_scots_gaelic)
75
+ Set up language parameters for Scots Gaelic."
76
+ (error "Scots Gaelic not yet supported.")
77
+
78
+ (Parameter.set 'Language 'scotsgaelic)
79
+ )
80
+
81
+ (define (language_welsh)
82
+ "(language_welsh)
83
+ Set up language parameters for Welsh."
84
+
85
+ (set! male1 voice_welsh_hl)
86
+
87
+ (male1)
88
+ (Parameter.set 'Language 'welsh)
89
+ )
90
+
91
+ (define (language_castillian_spanish)
92
+ "(language_spanish)
93
+ Set up language parameters for Castillian Spanish."
94
+
95
+ (voice_el_diphone)
96
+ (set! male1 voice_el_diphone)
97
+
98
+ (Parameter.set 'Language 'spanish)
99
+ )
100
+
101
+ (define (select_language language)
102
+ (cond
103
+ ((or (equal? language 'britishenglish)
104
+ (equal? language 'english)) ;; we all know its the *real* English
105
+ (language_british_english))
106
+ ((equal? language 'americanenglish)
107
+ (language_american_english))
108
+ ((equal? language 'scotsgaelic)
109
+ (language_scots_gaelic))
110
+ ((equal? language 'welsh)
111
+ (language_welsh))
112
+ ((equal? language 'spanish)
113
+ (language_castillian_spanish))
114
+ ((equal? language 'klingon)
115
+ (language_klingon))
116
+ (t
117
+ (print "Unsupported language, using English")
118
+ (language_british_english))))
119
+
120
+ (defvar language_default language_british_english)
121
+
122
+ (provide 'languages)
CosyVoice-ttsfrd/resource/festival/lexicons.scm ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2
+ ;;; ;;
3
+ ;;; Centre for Speech Technology Research ;;
4
+ ;;; University of Edinburgh, UK ;;
5
+ ;;; Copyright (c) 1996,1997 ;;
6
+ ;;; All Rights Reserved. ;;
7
+ ;;; ;;
8
+ ;;; Permission is hereby granted, free of charge, to use and distribute ;;
9
+ ;;; this software and its documentation without restriction, including ;;
10
+ ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
11
+ ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
12
+ ;;; permit persons to whom this work is furnished to do so, subject to ;;
13
+ ;;; the following conditions: ;;
14
+ ;;; 1. The code must retain the above copyright notice, this list of ;;
15
+ ;;; conditions and the following disclaimer. ;;
16
+ ;;; 2. Any modifications must be clearly marked as such. ;;
17
+ ;;; 3. Original authors' names are not deleted. ;;
18
+ ;;; 4. The authors' names are not used to endorse or promote products ;;
19
+ ;;; derived from this software without specific prior written ;;
20
+ ;;; permission. ;;
21
+ ;;; ;;
22
+ ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
23
+ ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
24
+ ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
25
+ ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
26
+ ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
27
+ ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
28
+ ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
29
+ ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
30
+ ;;; THIS SOFTWARE. ;;
31
+ ;;; ;;
32
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
+ ;;;
34
+ ;;; Definition of various lexicons
35
+ ;;;
36
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
37
+
38
+ ;;; If there exists a sudirectory of the lib-path called dicts then that
39
+ ;;; is used as the lexicon directory by default. If it doesn't exist
40
+ ;;; we set lexdir to the directory in CSTR where our lexicons are.
41
+ ;;; In non-CSTR installations where lexicons are not in lib/dicts,
42
+ ;;; you should set lexdir in sitevars.scm
43
+
44
+ (defvar lexdir
45
+ (if (probe_file (path-append libdir "dicts"))
46
+ (path-append libdir "dicts/")
47
+ ;; else we'll guess we're in the CSTR filespace
48
+ (path-as-directory "/projects/festival/lib/dicts/"))
49
+ "lexdir
50
+ The directory where the lexicon(s) are, by default.")
51
+
52
+ (require 'pos) ;; for part of speech mapping
53
+
54
+ (define (setup_cstr_lex)
55
+ "(setup_cstr_lexicon)
56
+ Define and setup the CSTR lexicon. The CSTR lexicon consists
57
+ of about 25,000 entries in the mrpa phone set. A large number of
58
+ specific local entries are also added to the addenda."
59
+ (if (not (member_string "mrpa" (lex.list)))
60
+ (begin
61
+ (lex.create "mrpa")
62
+ (lex.set.compile.file (path-append lexdir "cstrlex.out"))
63
+ (lex.set.phoneset "mrpa")
64
+ (lex.set.lts.method 'lts_rules)
65
+ (lex.set.lts.ruleset 'nrl)
66
+ (lex.set.pos.map english_pos_map_wp39_to_wp20)
67
+ (mrpa_addenda)
68
+ (lex.add.entry
69
+ '("previous" nil (((p r ii) 1) ((v ii) 0) ((@ s) 0))))
70
+ (lex.add.entry
71
+ '("audio" () (((oo d) 1) ((ii) 0) ((ou) 0))))
72
+ (lex.add.entry
73
+ '("modules" () (((m o d) 1) ((uu l s) 0))))
74
+ )))
75
+
76
+ (define (setup_oald_lex)
77
+ "(setup_oald_lexicon)
78
+ Define and setup the CUVOALD lexicon. This is derived from the
79
+ Computer Users Version of the Oxford Advanced Learners' Dictionary
80
+ of Current English. This version includes a trained set of letter
81
+ to sound rules which have also been used to reduce the actual lexicon
82
+ size by over half, for those entries that the lts model gets exactly
83
+ the same."
84
+ (if (not (member_string "oald" (lex.list)))
85
+ (load (path-append lexdir "oald/oaldlex.scm"))))
86
+
87
+ (define (setup_cmu_lex)
88
+ "(setup_cmu_lex)
89
+ Lexicon derived from the CMU lexicon (cmudict-0.4), around 100,000 entries,
90
+ in the radio phoneset (sort of darpa-like). Includes letter to sound
91
+ rule model trained from this data, and uses the lexical stress predictor
92
+ from OALD."
93
+ (if (not (member_string "cmu" (lex.list)))
94
+ (load (path-append lexdir "cmu/cmulex.scm"))))
95
+
96
+ (define (setup_cmumt_lex)
97
+ "(setup_cmumt_lex)
98
+ Lexicon derived from the CMU lexicon (cmudict-0.4), around 100,000 entries,
99
+ in the radio phoneset (sort of darpa-like). Includes letter to sound
100
+ rule model trained from this data, and uses the lexical stress predictor
101
+ from OALD."
102
+ (if (not (member_string "cmumt" (lex.list)))
103
+ (load (path-append lexdir "cmu_mt/cmumtlex.scm"))))
104
+
105
+ (define (setup_cmu6_lex)
106
+ "(setup_cmu6_lex)
107
+ Lexicon derived from the CMU lexicon (cmudict-0.6), around 100,000 entries,
108
+ in the radio phoneset (sort of darpa-like). Includes letter to sound
109
+ rule model trained from this data, the format of this lexicon is suitable
110
+ for the UniSyn metrical phonology modules. That is the entries are
111
+ not syllabified,"
112
+ (if (not (member_string "cmu6" (lex.list)))
113
+ (load (path-append lexdir "cmu6/cmu6lex.scm"))))
114
+
115
+ (define (setup_moby_lex)
116
+ "(setup_moby_lexicon)
117
+ Define and setup the MOBY lexicon. This is derived from the public
118
+ domain version of the Moby (TM) Pronunciator II lexicon. It can be
119
+ converted automatically to British English mrpa phoneset which of
120
+ course is sub-optimal. It contains around 120,000 entries and has part
121
+ of speech information for homographs."
122
+ (if (not (member_string "moby" (lex.list)))
123
+ (begin
124
+ (lex.create "moby")
125
+ ; (lex.set.compile.file (path-append lexdir "mobylex.out"))
126
+ (lex.set.compile.file "/home/awb/src/mobypron/mobylex.out")
127
+ (lex.set.phoneset "mrpa")
128
+ (lex.set.lts.method 'lts_rules)
129
+ (lex.set.lts.ruleset 'nrl)
130
+ (lex.set.pos.map english_pos_map_wp39_to_wp20)
131
+ (lex.add.entry
132
+ '("a" dt (((@) 0))))
133
+ (lex.add.entry
134
+ '("the" dt (((dh @) 0))))
135
+ (lex.add.entry
136
+ '("taylor" n (((t ei) 1) ((l @) 0))))
137
+ (lex.add.entry
138
+ '("who" prp ((( h uu ) 0))))
139
+ (mrpa_addenda))))
140
+
141
+ (define (setup_beep_lex)
142
+ "(setup_beep_lex)
143
+ Lexicon derived from the British English Example Pronunciation dictionary
144
+ (BEEP) from Tony Robinson [email protected]. Around 160,000 entries."
145
+ (if (not (member_string "beep" (lex.list)))
146
+ (begin
147
+ (lex.create "beep")
148
+ (lex.set.compile.file (path-append lexdir "beep_lex.out"))
149
+ (lex.set.phoneset "mrpa")
150
+ (lex.set.lts.method 'lts_rules)
151
+ (lex.set.lts.ruleset 'nrl)
152
+ (lex.set.pos.map english_pos_map_wp39_to_wp20)
153
+ (lex.add.entry
154
+ '("taylor" nil (((t ei) 1) ((l @) 0))))
155
+ (mrpa_addenda))))
156
+
157
+ ;;; The nrl letter to sound rules produce mrpa phone set so we need
158
+ ;;; to do some fancy things to make them work for American English
159
+ (define (f2b_lts word features)
160
+ "(f2b_lts WORD FEATURES)
161
+ Letter to sound rule system for f2b (American English), uses the NRL
162
+ LTS ruleset and maps the result to the radio phone set."
163
+ '("unknown" nil (((ah n) 0) ((n ow n) 1)))
164
+ )
165
+
166
+ ;;; A CART tree for predicting lexical stress for strings of phones
167
+ ;;; generated by the LTS models. This was actually trained from
168
+ ;;; OALD as that's the only lexicon with stress and part of speech information
169
+ ;;; It trained in a phoneset independent way and may be used be either
170
+ ;;; OALD or CMU models (and probably MOBY and OGI lex too).
171
+ ;;; On held out data it gets
172
+ ;;; 07390 378 7768 [7390/7768] 95.134
173
+ ;;; 1 512 8207 8719 [8207/8719] 94.128
174
+ ;;; 7902 8585
175
+ ;;; total 16487 correct 15597.000 94.602%
176
+ ;;;
177
+ (set! english_stress_tree
178
+ '((sylpos < 1.7)
179
+ ((1))
180
+ ((ph_vlng is a)
181
+ ((0))
182
+ ((ph_vheight is 1)
183
+ ((num2end < 1.5)
184
+ ((ph_vfront is 1)
185
+ ((ph_vlng is s) ((0)) ((pos is v) ((1)) ((0))))
186
+ ((pos is n) ((0)) ((sylpos < 2.2) ((1)) ((0)))))
187
+ ((ph_vlng is l)
188
+ ((1))
189
+ ((ph_vfront is 1)
190
+ ((num2end < 2.4)
191
+ ((0))
192
+ ((pos is a)
193
+ ((num2end < 3.3) ((sylpos < 2.3) ((1)) ((0))) ((0)))
194
+ ((sylpos < 3.2)
195
+ ((num2end < 3.3) ((0)) ((pos is v) ((1)) ((0))))
196
+ ((0)))))
197
+ ((0)))))
198
+ ((num2end < 1.5)
199
+ ((pos is n)
200
+ ((0))
201
+ ((sylpos < 2.4)
202
+ ((pos is v)
203
+ ((1))
204
+ ((ph_vlng is d)
205
+ ((ph_vheight is 2) ((ph_vfront is 1) ((1)) ((0))) ((0)))
206
+ ((1))))
207
+ ((ph_vlng is d)
208
+ ((sylpos < 3.3)
209
+ ((pos is v)
210
+ ((ph_vheight is 2) ((ph_vfront is 1) ((0)) ((1))) ((0)))
211
+ ((0)))
212
+ ((0)))
213
+ ((ph_vheight is 2)
214
+ ((1))
215
+ ((ph_vrnd is +) ((1)) ((ph_vlng is l) ((0)) ((1))))))))
216
+ ((ph_vlng is d)
217
+ ((pos is v)
218
+ ((sylpos < 2.4) ((1)) ((0)))
219
+ ((ph_vfront is 2)
220
+ ((pos is n)
221
+ ((num2end < 2.4)
222
+ ((ph_vrnd is +)
223
+ ((0))
224
+ ((sylpos < 2.2) ((1)) ((ph_vheight is 2) ((1)) ((0)))))
225
+ ((sylpos < 2.4) ((ph_vheight is 2) ((0)) ((1))) ((0))))
226
+ ((1)))
227
+ ((ph_vheight is 2) ((1)) ((ph_vfront is 1) ((0)) ((1))))))
228
+ ((pos is n)
229
+ ((num2end < 2.4)
230
+ ((ph_vfront is 3)
231
+ ((sylpos < 2.3) ((1)) ((ph_vlng is l) ((1)) ((0))))
232
+ ((1)))
233
+ ((1)))
234
+ ((1)))))))))
235
+
236
+ (define (lex_user_unknown_word word feats)
237
+ "(lex_user_unknown_word WORD FEATS)
238
+ Function called by lexicon when 'function type letter to sound rules
239
+ is defined. It is the user's responsibility to defined this function
240
+ themselves when they want to deal with unknown words themselves."
241
+ (error "lex_user_unknown_word: has not been defined by user"))
242
+
243
+ (define (Word utt)
244
+ "(Word utt)
245
+ Construct (synthesis specific) syllable/segments from Word relation
246
+ using current lexicon and specific module."
247
+ (let ((rval (apply_method 'Word_Method utt)))
248
+ (cond
249
+ (rval rval) ;; new style
250
+ (t
251
+ (Classic_Word utt)))))
252
+
253
+ (define (find_oovs vocab oovs)
254
+ (let ((fd (fopen vocab "r"))
255
+ (ofd (fopen oovs "w"))
256
+ (e 0)
257
+ (oov 0)
258
+ (entry))
259
+
260
+ (while (not (equal? (set! entry (readfp fd)) (eof-val)))
261
+ (set! e (+ 1 e))
262
+ (if (not (lex.lookup_all entry))
263
+ (begin
264
+ (set! oov (+ 1 oov))
265
+ (format ofd "%l\n" (lex.lookup entry nil))))
266
+ )
267
+ (format t ";; %d words %d oov %2.2f oov_rate\n"
268
+ e oov (/ (* oov 100.0) e))
269
+ )
270
+ )
271
+
272
+
273
+ (provide 'lexicons)
274
+