spark-tts commited on
Commit
e2f41b6
Β·
1 Parent(s): 23a5ae6

clean structure

Browse files
cli/SparkTTS.py CHANGED
@@ -17,9 +17,10 @@ import re
17
  import torch
18
  from pathlib import Path
19
  from transformers import AutoTokenizer, AutoModelForCausalLM
20
- from utils.file import load_config
21
- from models.audio_tokenizer import BiCodecTokenizer
22
- from utils.token_parser import TASK_TOKEN_MAP
 
23
 
24
 
25
  class SparkTTS:
 
17
  import torch
18
  from pathlib import Path
19
  from transformers import AutoTokenizer, AutoModelForCausalLM
20
+
21
+ from sparktts.utils.file import load_config
22
+ from sparktts.models.audio_tokenizer import BiCodecTokenizer
23
+ from sparktts.utils.token_parser import TASK_TOKEN_MAP
24
 
25
 
26
  class SparkTTS:
inference.py β†’ cli/inference.py RENAMED
File without changes
example/infer.sh CHANGED
@@ -33,10 +33,10 @@ prompt_speech_path="example/prompt_audio.wav"
33
  # Change directory to the root directory
34
  cd "$root_dir" || exit
35
 
36
- source utils/parse_options.sh
37
 
38
  # Run inference for each JSON file
39
- python inference.py \
40
  --text "${text}" \
41
  --device "${device}" \
42
  --save_dir "${save_dir}" \
 
33
  # Change directory to the root directory
34
  cd "$root_dir" || exit
35
 
36
+ source sparktts/utils/parse_options.sh
37
 
38
  # Run inference for each JSON file
39
+ python -m cli.inference \
40
  --text "${text}" \
41
  --device "${device}" \
42
  --save_dir "${save_dir}" \
{models β†’ sparktts/models}/audio_tokenizer.py RENAMED
@@ -21,9 +21,9 @@ from pathlib import Path
21
  from typing import Any, Dict, Tuple
22
  from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model
23
 
24
- from utils.file import load_config
25
- from utils.audio import load_audio
26
- from models.bicodec import BiCodec
27
 
28
 
29
  class BiCodecTokenizer:
 
21
  from typing import Any, Dict, Tuple
22
  from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model
23
 
24
+ from sparktts.utils.file import load_config
25
+ from sparktts.utils.audio import load_audio
26
+ from sparktts.models.bicodec import BiCodec
27
 
28
 
29
  class BiCodecTokenizer:
{models β†’ sparktts/models}/bicodec.py RENAMED
@@ -20,12 +20,12 @@ from typing import Dict, Any
20
  from omegaconf import DictConfig
21
  from safetensors.torch import load_file
22
 
23
- from utils.file import load_config
24
- from modules.speaker.speaker_encoder import SpeakerEncoder
25
- from modules.encoder_decoder.feat_encoder import Encoder
26
- from modules.encoder_decoder.feat_decoder import Decoder
27
- from modules.encoder_decoder.wave_generator import WaveGenerator
28
- from modules.vq.factorized_vector_quantize import FactorizedVectorQuantize
29
 
30
 
31
  class BiCodec(nn.Module):
 
20
  from omegaconf import DictConfig
21
  from safetensors.torch import load_file
22
 
23
+ from sparktts.utils.file import load_config
24
+ from sparktts.modules.speaker.speaker_encoder import SpeakerEncoder
25
+ from sparktts.modules.encoder_decoder.feat_encoder import Encoder
26
+ from sparktts.modules.encoder_decoder.feat_decoder import Decoder
27
+ from sparktts.modules.encoder_decoder.wave_generator import WaveGenerator
28
+ from sparktts.modules.vq.factorized_vector_quantize import FactorizedVectorQuantize
29
 
30
 
31
  class BiCodec(nn.Module):
{modules β†’ sparktts/modules}/blocks/layers.py RENAMED
File without changes
{modules β†’ sparktts/modules}/blocks/samper.py RENAMED
File without changes
{modules β†’ sparktts/modules}/blocks/vocos.py RENAMED
File without changes
{modules β†’ sparktts/modules}/encoder_decoder/feat_decoder.py RENAMED
@@ -19,8 +19,8 @@ import torch.nn as nn
19
 
20
  from typing import List
21
 
22
- from modules.blocks.vocos import VocosBackbone
23
- from modules.blocks.samper import SamplingBlock
24
 
25
 
26
  class Decoder(nn.Module):
 
19
 
20
  from typing import List
21
 
22
+ from sparktts.modules.blocks.vocos import VocosBackbone
23
+ from sparktts.modules.blocks.samper import SamplingBlock
24
 
25
 
26
  class Decoder(nn.Module):
{modules β†’ sparktts/modules}/encoder_decoder/feat_encoder.py RENAMED
@@ -19,8 +19,8 @@ import torch.nn as nn
19
 
20
  from typing import List
21
 
22
- from modules.blocks.vocos import VocosBackbone
23
- from modules.blocks.samper import SamplingBlock
24
 
25
 
26
  class Encoder(nn.Module):
 
19
 
20
  from typing import List
21
 
22
+ from sparktts.modules.blocks.vocos import VocosBackbone
23
+ from sparktts.modules.blocks.samper import SamplingBlock
24
 
25
 
26
  class Encoder(nn.Module):
{modules β†’ sparktts/modules}/encoder_decoder/wave_generator.py RENAMED
@@ -17,7 +17,7 @@
17
 
18
  import torch.nn as nn
19
 
20
- from modules.blocks.layers import (
21
  Snake1d,
22
  WNConv1d,
23
  ResidualUnit,
 
17
 
18
  import torch.nn as nn
19
 
20
+ from sparktts.modules.blocks.layers import (
21
  Snake1d,
22
  WNConv1d,
23
  ResidualUnit,
{modules β†’ sparktts/modules}/fsq/finite_scalar_quantization.py RENAMED
File without changes
{modules β†’ sparktts/modules}/fsq/residual_fsq.py RENAMED
@@ -10,7 +10,7 @@ from torch.amp import autocast
10
  from einx import get_at
11
  from einops import rearrange, reduce, pack, unpack
12
 
13
- from modules.fsq.finite_scalar_quantization import FSQ
14
 
15
 
16
  def exists(val):
 
10
  from einx import get_at
11
  from einops import rearrange, reduce, pack, unpack
12
 
13
+ from sparktts.modules.fsq.finite_scalar_quantization import FSQ
14
 
15
 
16
  def exists(val):
{modules β†’ sparktts/modules}/speaker/ecapa_tdnn.py RENAMED
@@ -22,7 +22,7 @@ import torch
22
  import torch.nn as nn
23
  import torch.nn.functional as F
24
 
25
- import modules.speaker.pooling_layers as pooling_layers
26
 
27
 
28
  class Res2Conv1dReluBn(nn.Module):
 
22
  import torch.nn as nn
23
  import torch.nn.functional as F
24
 
25
+ import sparktts.modules.speaker.pooling_layers as pooling_layers
26
 
27
 
28
  class Res2Conv1dReluBn(nn.Module):
{modules β†’ sparktts/modules}/speaker/perceiver_encoder.py RENAMED
File without changes
{modules β†’ sparktts/modules}/speaker/pooling_layers.py RENAMED
File without changes
{modules β†’ sparktts/modules}/speaker/speaker_encoder.py RENAMED
@@ -17,9 +17,9 @@ import torch
17
  import torch.nn as nn
18
 
19
  from typing import List, Tuple
20
- from modules.fsq.residual_fsq import ResidualFSQ
21
- from modules.speaker.ecapa_tdnn import ECAPA_TDNN_GLOB_c512
22
- from modules.speaker.perceiver_encoder import PerceiverResampler
23
 
24
  """
25
  x-vector + d-vector
 
17
  import torch.nn as nn
18
 
19
  from typing import List, Tuple
20
+ from sparktts.modules.fsq.residual_fsq import ResidualFSQ
21
+ from sparktts.modules.speaker.ecapa_tdnn import ECAPA_TDNN_GLOB_c512
22
+ from sparktts.modules.speaker.perceiver_encoder import PerceiverResampler
23
 
24
  """
25
  x-vector + d-vector
{modules β†’ sparktts/modules}/vq/factorized_vector_quantize.py RENAMED
File without changes
{utils β†’ sparktts/utils}/__init__.py RENAMED
File without changes
{utils β†’ sparktts/utils}/audio.py RENAMED
File without changes
{utils β†’ sparktts/utils}/file.py RENAMED
File without changes
{utils β†’ sparktts/utils}/parse_options.sh RENAMED
File without changes
{utils β†’ sparktts/utils}/token_parser.py RENAMED
File without changes