sanchit-gandhi commited on
Commit
5c99329
·
verified ·
1 Parent(s): b52d0fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -16
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import torch
2
  from threading import Thread
3
 
@@ -43,35 +44,28 @@ Bark can generate highly realistic, multilingual speech as well as other audio -
43
  In this demo, we leverage charactr's Vocos model to create high quality audio from Bark. \
44
  """
45
 
46
- # import model
47
- if device == "cpu":
48
- bark = BarkModel.from_pretrained(HUB_PATH)
49
- else:
50
- bark = BarkModel.from_pretrained(HUB_PATH).to(device)
51
- bark = bark.to_bettertransformer()
52
 
53
 
54
- # Inference
55
- def generate_audio(text, voice_preset = None, lag = 0):
 
56
  if voice_preset not in speaker_embeddings:
57
  voice_preset = None
58
-
59
- sentences = [
60
- text,
61
- ]
62
- inputs = processor(sentences, voice_preset=voice_preset).to(device)
63
  # Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
64
 
65
  fine_output = bark.generate(
66
  **inputs, coarse_temperature = 0.8, temperature = 0.5, do_sample=True
67
  )
68
-
69
  print("Fine tokens generated")
70
-
71
  with torch.no_grad():
72
  features = vocos.codes_to_features(fine_output.transpose(0,1))
73
  vocos_waveform = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
74
-
75
  return (SAMPLE_RATE, vocos_waveform.cpu().squeeze().numpy())
76
 
77
 
 
1
+ import spaces
2
  import torch
3
  from threading import Thread
4
 
 
44
  In this demo, we leverage charactr's Vocos model to create high quality audio from Bark. \
45
  """
46
 
47
+ bark = BarkModel.from_pretrained(HUB_PATH).to(device)
48
+ bark = bark.to_bettertransformer()
 
 
 
 
49
 
50
 
51
+ # Inference on Zero GPU
52
+ @spaces.GPU
53
+ def generate_audio(text, voice_preset=None, lag=0):
54
  if voice_preset not in speaker_embeddings:
55
  voice_preset = None
56
+
57
+ inputs = processor([text], voice_preset=voice_preset).to(device)
 
 
 
58
  # Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
59
 
60
  fine_output = bark.generate(
61
  **inputs, coarse_temperature = 0.8, temperature = 0.5, do_sample=True
62
  )
 
63
  print("Fine tokens generated")
64
+
65
  with torch.no_grad():
66
  features = vocos.codes_to_features(fine_output.transpose(0,1))
67
  vocos_waveform = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
68
+
69
  return (SAMPLE_RATE, vocos_waveform.cpu().squeeze().numpy())
70
 
71