adamo1139 commited on
Commit
e91ec4d
1 Parent(s): 6ba7188

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +8 -4
README.md CHANGED
@@ -34,10 +34,10 @@ import tempfile
34
  import os
35
  import numpy as np
36
 
37
- # Initialize the Spirit LM Base model. If you want to use expressive model, simply change the value from spirit-lm-base-7b to spirit-lm-expressive-7b
38
  spirit_lm = Spiritlm("spirit-lm-base-7b")
39
 
40
- def generate_output(input_type, input_content_text, input_content_audio, output_modality, temperature, top_p, max_new_tokens, do_sample):
41
  generation_config = GenerationConfig(
42
  temperature=temperature,
43
  top_p=top_p,
@@ -58,6 +58,7 @@ def generate_output(input_type, input_content_text, input_content_audio, output_
58
  interleaved_inputs=interleaved_inputs,
59
  output_modality=OutputModality[output_modality.upper()],
60
  generation_config=generation_config,
 
61
  )
62
 
63
  text_output = ""
@@ -94,23 +95,26 @@ def generate_output(input_type, input_content_text, input_content_audio, output_
94
  iface = gr.Interface(
95
  fn=generate_output,
96
  inputs=[
97
- gr.Radio(["text", "audio"], label="Input Type"),
98
  gr.Textbox(label="Input Content (Text)"),
99
  gr.Audio(label="Input Content (Audio)", type="filepath"),
100
- gr.Radio(["TEXT", "SPEECH", "ARBITRARY"], label="Output Modality"),
101
  gr.Slider(0, 1, step=0.1, value=0.9, label="Temperature"),
102
  gr.Slider(0, 1, step=0.05, value=0.95, label="Top P"),
103
  gr.Slider(1, 800, step=1, value=500, label="Max New Tokens"),
104
  gr.Checkbox(value=True, label="Do Sample"),
 
105
  ],
106
  outputs=[gr.Textbox(label="Generated Text"), gr.Audio(label="Generated Audio")],
107
  title="Spirit LM WebUI Demo",
108
  description="Demo for generating text or audio using the Spirit LM model.",
 
109
  )
110
 
111
  # Launch the interface
112
  iface.launch()
113
 
 
114
  ```
115
 
116
 
 
34
  import os
35
  import numpy as np
36
 
37
+ # Initialize the Spirit LM model with the modified class
38
  spirit_lm = Spiritlm("spirit-lm-base-7b")
39
 
40
+ def generate_output(input_type, input_content_text, input_content_audio, output_modality, temperature, top_p, max_new_tokens, do_sample, speaker_id):
41
  generation_config = GenerationConfig(
42
  temperature=temperature,
43
  top_p=top_p,
 
58
  interleaved_inputs=interleaved_inputs,
59
  output_modality=OutputModality[output_modality.upper()],
60
  generation_config=generation_config,
61
+ speaker_id=speaker_id, # Pass the selected speaker ID
62
  )
63
 
64
  text_output = ""
 
95
  iface = gr.Interface(
96
  fn=generate_output,
97
  inputs=[
98
+ gr.Radio(["text", "audio"], label="Input Type", value="text"),
99
  gr.Textbox(label="Input Content (Text)"),
100
  gr.Audio(label="Input Content (Audio)", type="filepath"),
101
+ gr.Radio(["TEXT", "SPEECH", "ARBITRARY"], label="Output Modality", value="SPEECH"),
102
  gr.Slider(0, 1, step=0.1, value=0.9, label="Temperature"),
103
  gr.Slider(0, 1, step=0.05, value=0.95, label="Top P"),
104
  gr.Slider(1, 800, step=1, value=500, label="Max New Tokens"),
105
  gr.Checkbox(value=True, label="Do Sample"),
106
+ gr.Dropdown(choices=[0, 1, 2, 3], value=0, label="Speaker ID"),
107
  ],
108
  outputs=[gr.Textbox(label="Generated Text"), gr.Audio(label="Generated Audio")],
109
  title="Spirit LM WebUI Demo",
110
  description="Demo for generating text or audio using the Spirit LM model.",
111
+ flagging_mode="never",
112
  )
113
 
114
  # Launch the interface
115
  iface.launch()
116
 
117
+
118
  ```
119
 
120