ecker
/

vall-e

Model card Files Files and versions Community

mrq commited on Oct 26, 2024

Commit

f8eb280

1 Parent(s): 6542f44

Cleanup

Browse files

Files changed (12) hide show

loras/ckpt/lora-glados-r128-a128/lora.sft +0 -3
loras/ckpt/lora-max-r128-a128/lora.sft +0 -3
loras/ckpt/lora-sam-r128-a128/lora.sft +0 -3
loras/ckpt/lora-shodan-r128-a128/lora.sft +0 -3
loras/config.lora.glados.yaml +0 -130
loras/config.lora.max.yaml +0 -130
loras/config.lora.sam.yaml +0 -130
loras/config.lora.shodan.yaml +0 -130
loras/config.lora[delamain].yaml +26 -0
loras/config.lora[glados].yaml +26 -0
loras/config.lora[johnny].yaml +26 -0
loras/config.lora[sam].yaml +26 -0

loras/ckpt/lora-glados-r128-a128/lora.sft DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3395ef66c721a404b6204af6e61f607199b97acf430cfb6151f7d926ca09196e
-size 66076304

loras/ckpt/lora-max-r128-a128/lora.sft DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0e8065b17f981dba303af2e5c92275902a45e8e38e9eea578f0a9e4b50690f1e
-size 66076312

loras/ckpt/lora-sam-r128-a128/lora.sft DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:433ada1d6f9cba5ebce7917bb51df09f050f4d221afdc4cdad72964b6624f9fc
-size 66076312

loras/ckpt/lora-shodan-r128-a128/lora.sft DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:206cc3add30585858531744857613e2d6d80ddaa238357ae3f407e4cc15318da
-size 66076304

loras/config.lora.glados.yaml DELETED Viewed

@@ -1,130 +0,0 @@
-sample_rate: 24_000
-audio_backend: "vocos"
-models:
-- name: "ar+nar"
-  size: "full"
-  resp_levels: 8
-  prom_levels: 8
-  tasks: 9
-  langs: 4
-  tones: 1
-  arch_type: llama
-  training: True
-  version: 5
-  attention: auto
-  dropout: 0.1
-  #loss_factors:
-  #  text: 0.01
-  #  prom: 0.5
-  #  resp: 1.0
-  capabilities: ["ar", "nar"]
-  experimental:
-    p_rvq_levels: "auto"
-    audio_embedding_sums: True
-    unified_position_ids: False
-    split_classifiers: True
-    #
-    causal_size: 1
-    interleave: False
-    rvq_level_range: []
-    tie_classifier_to_embedding: False
-loras:
-- name : "lora-glados"
-  rank: 128
-  alpha: 128
-  training: True
-  rvq_levels: []
-hyperparameters:
-  batch_size: 32
-  gradient_accumulation_steps: 8
-  gradient_clipping: 1.0
-  warmup_steps: 10
-  optimizer: Prodigy
-  learning_rate: 1.0
-  torch_optimizer: True
-  scheduler: "" # ScheduleFree
-  torch_scheduler: True
-evaluation:
-  batch_size: 4
-  frequency: 250
-  size: 4
-  steps: 500
-  ar_temperature: 1.0
-  nar_temperature: 0.0
-trainer:
-  iterations: 1_000_000
-  save_frequency: 250
-  keep_last_checkpoints: 4
-  resize_modules: True
-  check_for_oom: False
-  gradient_checkpointing: True
-  weight_dtype: bfloat16
-  amp: True
-  backend: deepspeed
-  deepspeed:
-    inferencing: False
-    amp: False
-  load_webui: False
-inference:
-  backend: local
-  normalize: False
-  weight_dtype: bfloat16
-  amp: True
-optimizations:
-  injects: False
-  replace: True
-  linear: False
-  embedding: False
-  optimizers: True
-  bitsandbytes: False
-  dadaptation: False
-  bitnet: False
-  fp8: False
-dataset:
-  use_hdf5: True
-  hdf5_flag: r
-  use_metadata: True
-  validate: True
-  workers: 1
-  cache: True
-  duration_range: [3.0, 12.0]
-  random_utterance: 1.0
-  max_prompts: 1
-  prompt_duration_range: [3.0, 3.0]
-  max_resps: 1
-  p_resp_append: 0.25
-  sample_type: path # path # speaker
-  sample_order: duration
-  sample_max_duration_batch: 300
-  sample_shuffle: False
-  tasks_list: [ "tts", "stt" ]
-  training: []
-  validation: []
-  noise: []

loras/config.lora.max.yaml DELETED Viewed

@@ -1,130 +0,0 @@
-sample_rate: 24_000
-audio_backend: "vocos"
-models:
-- name: "ar+nar-tts+stt"
-  size: "full"
-  resp_levels: 8
-  prom_levels: 8
-  tasks: 9
-  langs: 2
-  tones: 1
-  arch_type: llama
-  training: False
-  version: 5
-  attention: auto
-  dropout: 0.1
-  #loss_factors:
-  #  text: 0.01
-  #  prom: 0.5
-  #  resp: 1.0
-  capabilities: ["ar", "nar"]
-  experimental:
-    p_rvq_levels: "auto"
-    audio_embedding_sums: True
-    unified_position_ids: False
-    split_classifiers: True
-    #
-    causal_size: 1
-    interleave: False
-    rvq_level_range: []
-    tie_classifier_to_embedding: False
-loras:
-- name : "lora-max"
-  rank: 128
-  alpha: 128
-  training: True
-  rvq_levels: []
-hyperparameters:
-  batch_size: 32
-  gradient_accumulation_steps: 8
-  gradient_clipping: 1.0
-  warmup_steps: 10
-  optimizer: Prodigy
-  learning_rate: 1.0
-  torch_optimizer: True
-  scheduler: "" # ScheduleFree
-  torch_scheduler: True
-evaluation:
-  batch_size: 4
-  frequency: 250
-  size: 4
-  steps: 500
-  ar_temperature: 1.0
-  nar_temperature: 0.0
-trainer:
-  iterations: 1_000_000
-  save_frequency: 250
-  keep_last_checkpoints: 4
-  resize_modules: True
-  check_for_oom: False
-  gradient_checkpointing: True
-  weight_dtype: bfloat16
-  amp: True
-  backend: deepspeed
-  deepspeed:
-    inferencing: False
-    amp: False
-  load_webui: False
-inference:
-  backend: local
-  normalize: False
-  weight_dtype: bfloat16
-  amp: True
-optimizations:
-  injects: False
-  replace: True
-  linear: False
-  embedding: False
-  optimizers: True
-  bitsandbytes: False
-  dadaptation: False
-  bitnet: False
-  fp8: False
-dataset:
-  use_hdf5: True
-  hdf5_flag: r
-  use_metadata: True
-  validate: True
-  workers: 1
-  cache: True
-  duration_range: [3.0, 12.0]
-  random_utterance: 1.0
-  max_prompts: 1
-  prompt_duration_range: [3.0, 3.0]
-  max_resps: 1
-  p_resp_append: 0.25
-  sample_type: path # path # speaker
-  sample_order: duration
-  sample_max_duration_batch: 300
-  sample_shuffle: False
-  tasks_list: [ "tts", "stt" ]
-  training: []
-  validation: []
-  noise: []

loras/config.lora.sam.yaml DELETED Viewed

@@ -1,130 +0,0 @@
-sample_rate: 24_000
-audio_backend: "vocos"
-models:
-- name: "ar+nar-tts+stt"
-  size: "full"
-  resp_levels: 8
-  prom_levels: 8
-  tasks: 9
-  langs: 2
-  tones: 1
-  arch_type: llama
-  training: False
-  version: 5
-  attention: auto
-  dropout: 0.1
-  #loss_factors:
-  #  text: 0.01
-  #  prom: 0.5
-  #  resp: 1.0
-  capabilities: ["ar", "nar"]
-  experimental:
-    p_rvq_levels: "auto"
-    audio_embedding_sums: True
-    unified_position_ids: False
-    split_classifiers: True
-    #
-    causal_size: 1
-    interleave: False
-    rvq_level_range: []
-    tie_classifier_to_embedding: False
-loras:
-- name : "lora-sam"
-  rank: 128
-  alpha: 128
-  training: True
-  rvq_levels: []
-hyperparameters:
-  batch_size: 32
-  gradient_accumulation_steps: 8
-  gradient_clipping: 1.0
-  warmup_steps: 10
-  optimizer: Prodigy
-  learning_rate: 1.0
-  torch_optimizer: True
-  scheduler: "" # ScheduleFree
-  torch_scheduler: True
-evaluation:
-  batch_size: 4
-  frequency: 250
-  size: 4
-  steps: 500
-  ar_temperature: 1.0
-  nar_temperature: 0.0
-trainer:
-  iterations: 1_000_000
-  save_frequency: 250
-  keep_last_checkpoints: 4
-  resize_modules: True
-  check_for_oom: False
-  gradient_checkpointing: True
-  weight_dtype: bfloat16
-  amp: True
-  backend: deepspeed
-  deepspeed:
-    inferencing: False
-    amp: False
-  load_webui: False
-inference:
-  backend: local
-  normalize: False
-  weight_dtype: bfloat16
-  amp: True
-optimizations:
-  injects: False
-  replace: True
-  linear: False
-  embedding: False
-  optimizers: True
-  bitsandbytes: False
-  dadaptation: False
-  bitnet: False
-  fp8: False
-dataset:
-  use_hdf5: True
-  hdf5_flag: r
-  use_metadata: True
-  validate: True
-  workers: 1
-  cache: True
-  duration_range: [3.0, 12.0]
-  random_utterance: 1.0
-  max_prompts: 1
-  prompt_duration_range: [3.0, 3.0]
-  max_resps: 1
-  p_resp_append: 0.25
-  sample_type: path # path # speaker
-  sample_order: duration
-  sample_max_duration_batch: 300
-  sample_shuffle: False
-  tasks_list: [ "tts", "stt" ]
-  training: []
-  validation: []
-  noise: []

loras/config.lora.shodan.yaml DELETED Viewed

@@ -1,130 +0,0 @@
-sample_rate: 24_000
-audio_backend: "vocos"
-models:
-- name: "ar+nar"
-  size: "full"
-  resp_levels: 8
-  prom_levels: 8
-  tasks: 9
-  langs: 4
-  tones: 1
-  arch_type: llama
-  training: True
-  version: 5
-  attention: auto
-  dropout: 0.1
-  #loss_factors:
-  #  text: 0.01
-  #  prom: 0.5
-  #  resp: 1.0
-  capabilities: ["ar", "nar"]
-  experimental:
-    p_rvq_levels: "auto"
-    audio_embedding_sums: True
-    unified_position_ids: False
-    split_classifiers: True
-    #
-    causal_size: 1
-    interleave: False
-    rvq_level_range: []
-    tie_classifier_to_embedding: False
-loras:
-- name : "lora-shodan"
-  rank: 128
-  alpha: 128
-  training: True
-  rvq_levels: []
-hyperparameters:
-  batch_size: 32
-  gradient_accumulation_steps: 8
-  gradient_clipping: 1.0
-  warmup_steps: 10
-  optimizer: Prodigy
-  learning_rate: 1.0
-  torch_optimizer: True
-  scheduler: "" # ScheduleFree
-  torch_scheduler: True
-evaluation:
-  batch_size: 4
-  frequency: 250
-  size: 4
-  steps: 500
-  ar_temperature: 1.0
-  nar_temperature: 0.0
-trainer:
-  iterations: 1_000_000
-  save_frequency: 250
-  keep_last_checkpoints: 4
-  resize_modules: True
-  check_for_oom: False
-  gradient_checkpointing: True
-  weight_dtype: bfloat16
-  amp: True
-  backend: deepspeed
-  deepspeed:
-    inferencing: False
-    amp: False
-  load_webui: False
-inference:
-  backend: local
-  normalize: False
-  weight_dtype: bfloat16
-  amp: True
-optimizations:
-  injects: False
-  replace: True
-  linear: False
-  embedding: False
-  optimizers: True
-  bitsandbytes: False
-  dadaptation: False
-  bitnet: False
-  fp8: False
-dataset:
-  use_hdf5: True
-  hdf5_flag: r
-  use_metadata: True
-  validate: True
-  workers: 1
-  cache: True
-  duration_range: [3.0, 12.0]
-  random_utterance: 1.0
-  max_prompts: 1
-  prompt_duration_range: [3.0, 3.0]
-  max_resps: 1
-  p_resp_append: 0.25
-  sample_type: path # path # speaker
-  sample_order: duration
-  sample_max_duration_batch: 300
-  sample_shuffle: False
-  tasks_list: [ "tts", "stt" ]
-  training: []
-  validation: []
-  noise: []

loras/config.lora[delamain].yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+models:
+- name: "ar+nar"
+  size: "full"
+  resp_levels: 8
+  tasks: 9
+  langs: 4
+  tones: 1
+  arch_type: llama
+  attention: auto
+  version: 5
+  capabilities: ["ar", "nar"]
+  experimental:
+    split_classifiers: True
+    audio_embedding_sums: True
+    unified_position_ids: False
+    rvq_levels_p: [
+      0, 0, 0, 0, 0, 0, 0,
+      1, 2, 3, 4, 5, 6, 7
+    ]
+loras:
+- name : "lora-cyberpunk-delamain"
+  rank: 128
+  alpha: 128
+  rvq_levels: []

loras/config.lora[glados].yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+models:
+- name: "ar+nar"
+  size: "full"
+  resp_levels: 8
+  tasks: 9
+  langs: 4
+  tones: 1
+  arch_type: llama
+  attention: auto
+  version: 5
+  capabilities: ["ar", "nar"]
+  experimental:
+    split_classifiers: True
+    audio_embedding_sums: True
+    unified_position_ids: False
+    rvq_levels_p: [
+      0, 0, 0, 0, 0, 0, 0,
+      1, 2, 3, 4, 5, 6, 7
+    ]
+loras:
+- name : "lora-portal-glados"
+  rank: 128
+  alpha: 128
+  rvq_levels: []

loras/config.lora[johnny].yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+models:
+- name: "ar+nar"
+  size: "full"
+  resp_levels: 8
+  tasks: 9
+  langs: 4
+  tones: 1
+  arch_type: llama
+  attention: auto
+  version: 5
+  capabilities: ["ar", "nar"]
+  experimental:
+    split_classifiers: True
+    audio_embedding_sums: True
+    unified_position_ids: False
+    rvq_levels_p: [
+      0, 0, 0, 0, 0, 0, 0,
+      1, 2, 3, 4, 5, 6, 7
+    ]
+loras:
+- name : "lora-cyberpunk-silverhand"
+  rank: 128
+  alpha: 128
+  rvq_levels: []

loras/config.lora[sam].yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+models:
+- name: "ar+nar"
+  size: "full"
+  resp_levels: 8
+  tasks: 9
+  langs: 4
+  tones: 1
+  arch_type: llama
+  attention: auto
+  version: 5
+  capabilities: ["ar", "nar"]
+  experimental:
+    split_classifiers: True
+    audio_embedding_sums: True
+    unified_position_ids: False
+    rvq_levels_p: [
+      0, 0, 0, 0, 0, 0, 0,
+      1, 2, 3, 4, 5, 6, 7
+    ]
+loras:
+- name : "lora-samandmax-sam"
+  rank: 128
+  alpha: 128
+  rvq_levels: []