Update README.md
Browse files
README.md
CHANGED
@@ -34,10 +34,10 @@ import tempfile
|
|
34 |
import os
|
35 |
import numpy as np
|
36 |
|
37 |
-
# Initialize the Spirit LM
|
38 |
spirit_lm = Spiritlm("spirit-lm-base-7b")
|
39 |
|
40 |
-
def generate_output(input_type, input_content_text, input_content_audio, output_modality, temperature, top_p, max_new_tokens, do_sample):
|
41 |
generation_config = GenerationConfig(
|
42 |
temperature=temperature,
|
43 |
top_p=top_p,
|
@@ -58,6 +58,7 @@ def generate_output(input_type, input_content_text, input_content_audio, output_
|
|
58 |
interleaved_inputs=interleaved_inputs,
|
59 |
output_modality=OutputModality[output_modality.upper()],
|
60 |
generation_config=generation_config,
|
|
|
61 |
)
|
62 |
|
63 |
text_output = ""
|
@@ -94,23 +95,26 @@ def generate_output(input_type, input_content_text, input_content_audio, output_
|
|
94 |
iface = gr.Interface(
|
95 |
fn=generate_output,
|
96 |
inputs=[
|
97 |
-
gr.Radio(["text", "audio"], label="Input Type"),
|
98 |
gr.Textbox(label="Input Content (Text)"),
|
99 |
gr.Audio(label="Input Content (Audio)", type="filepath"),
|
100 |
-
gr.Radio(["TEXT", "SPEECH", "ARBITRARY"], label="Output Modality"),
|
101 |
gr.Slider(0, 1, step=0.1, value=0.9, label="Temperature"),
|
102 |
gr.Slider(0, 1, step=0.05, value=0.95, label="Top P"),
|
103 |
gr.Slider(1, 800, step=1, value=500, label="Max New Tokens"),
|
104 |
gr.Checkbox(value=True, label="Do Sample"),
|
|
|
105 |
],
|
106 |
outputs=[gr.Textbox(label="Generated Text"), gr.Audio(label="Generated Audio")],
|
107 |
title="Spirit LM WebUI Demo",
|
108 |
description="Demo for generating text or audio using the Spirit LM model.",
|
|
|
109 |
)
|
110 |
|
111 |
# Launch the interface
|
112 |
iface.launch()
|
113 |
|
|
|
114 |
```
|
115 |
|
116 |
|
|
|
34 |
import os
|
35 |
import numpy as np
|
36 |
|
37 |
+
# Initialize the Spirit LM model with the modified class
|
38 |
spirit_lm = Spiritlm("spirit-lm-base-7b")
|
39 |
|
40 |
+
def generate_output(input_type, input_content_text, input_content_audio, output_modality, temperature, top_p, max_new_tokens, do_sample, speaker_id):
|
41 |
generation_config = GenerationConfig(
|
42 |
temperature=temperature,
|
43 |
top_p=top_p,
|
|
|
58 |
interleaved_inputs=interleaved_inputs,
|
59 |
output_modality=OutputModality[output_modality.upper()],
|
60 |
generation_config=generation_config,
|
61 |
+
speaker_id=speaker_id, # Pass the selected speaker ID
|
62 |
)
|
63 |
|
64 |
text_output = ""
|
|
|
95 |
iface = gr.Interface(
|
96 |
fn=generate_output,
|
97 |
inputs=[
|
98 |
+
gr.Radio(["text", "audio"], label="Input Type", value="text"),
|
99 |
gr.Textbox(label="Input Content (Text)"),
|
100 |
gr.Audio(label="Input Content (Audio)", type="filepath"),
|
101 |
+
gr.Radio(["TEXT", "SPEECH", "ARBITRARY"], label="Output Modality", value="SPEECH"),
|
102 |
gr.Slider(0, 1, step=0.1, value=0.9, label="Temperature"),
|
103 |
gr.Slider(0, 1, step=0.05, value=0.95, label="Top P"),
|
104 |
gr.Slider(1, 800, step=1, value=500, label="Max New Tokens"),
|
105 |
gr.Checkbox(value=True, label="Do Sample"),
|
106 |
+
gr.Dropdown(choices=[0, 1, 2, 3], value=0, label="Speaker ID"),
|
107 |
],
|
108 |
outputs=[gr.Textbox(label="Generated Text"), gr.Audio(label="Generated Audio")],
|
109 |
title="Spirit LM WebUI Demo",
|
110 |
description="Demo for generating text or audio using the Spirit LM model.",
|
111 |
+
flagging_mode="never",
|
112 |
)
|
113 |
|
114 |
# Launch the interface
|
115 |
iface.launch()
|
116 |
|
117 |
+
|
118 |
```
|
119 |
|
120 |
|