j commited on
Commit
564c686
·
1 Parent(s): 13e5909

initial commit

Browse files
Files changed (3) hide show
  1. README.md +2 -1
  2. app.py +113 -0
  3. requirements.txt +16 -0
README.md CHANGED
@@ -1,8 +1,9 @@
1
  ---
2
- title: Audiosr
3
  emoji: 🏢
4
  colorFrom: yellow
5
  colorTo: yellow
 
6
  sdk: gradio
7
  sdk_version: 4.16.0
8
  app_file: app.py
 
1
  ---
2
+ title: Versatile Audio Super-resolution HARP plugin
3
  emoji: 🏢
4
  colorFrom: yellow
5
  colorTo: yellow
6
+ python_version: 3.9
7
  sdk: gradio
8
  sdk_version: 4.16.0
9
  app_file: app.py
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ import os
3
+ import torch
4
+ from audiosr import super_resolution, build_model, save_wave, get_time, read_list
5
+ from pyharp import ModelCard, build_endpoint
6
+ from audiotools import AudioSignal
7
+ import scipy
8
+ import torch
9
+ import gradio as gr
10
+
11
+ card = ModelCard(
12
+ name='Versatile Audio Super Resolution',
13
+ description='Upsample audio and predict upper spectrum.',
14
+ author='Team Audio',
15
+ tags=['AudioSR', 'Diffusion', 'Super Resolution', 'Upsampling', 'Sample Rate Conversion']
16
+ )
17
+
18
+ os.environ["TOKENIZERS_PARALLELISM"] = "true"
19
+ torch.set_float32_matmul_precision("high")
20
+ latent_t_per_second=12.8 # not sure about this??
21
+
22
+ audiosr = build_model(model_name="basic", device="auto")
23
+
24
+ def process_fn(input_audio_path, seed, guidance_scale, num_inference_steps):
25
+ """
26
+ This function defines the audio processing steps
27
+
28
+ Args:
29
+ input_audio_path (str): the audio filepath to be processed.
30
+
31
+ <YOUR_KWARGS>: additional keyword arguments necessary for processing.
32
+ NOTE: These should correspond to and match order of UI elements defined below.
33
+
34
+ Returns:
35
+ output_audio_path (str): the filepath of the processed audio.
36
+ """
37
+
38
+ sig = AudioSignal(input_audio_path)
39
+
40
+ outfile = "./output.wav"
41
+
42
+ audio_concat = None
43
+
44
+ total_length = sig.duration
45
+ num_segs = int(total_length / 10) #10 second segments
46
+ remainder = total_length % 10 # duration of last segment
47
+
48
+ for audio_segment in range(num_segs):
49
+ start = audio_segment * 10
50
+
51
+ if audio_segment == num_segs - 1:
52
+ end = start + remainder
53
+ else:
54
+ end = start + 10
55
+
56
+ # get segment of audio from original file
57
+ sig_seg = sig[start:end]
58
+ sig_seg.write("temp.wav")
59
+ audio = super_resolution(
60
+ audiosr,
61
+ "temp.wav",
62
+ seed=seed,
63
+ guidance_scale=guidance_scale,
64
+ ddim_steps=num_inference_steps,
65
+ latent_t_per_second=latent_t_per_second
66
+ )
67
+
68
+ #save_wave(waveform, output_dir, name=name, samplerate=sig.sample_rate)
69
+
70
+ if audio_concat is None:
71
+ audio_concat = audio
72
+ #audio_concat = audio[0]
73
+ else:
74
+ audio_concat = scipy.concatenate((audio_concat, audio))
75
+
76
+ scipy.io.wavfile.write(outfile, rate=sig.sample_rate, data=audio_concat)
77
+ return outfile
78
+
79
+ # Build the endpoint
80
+ with gr.Blocks() as webapp:
81
+ # Define your Gradio interface
82
+ inputs = [
83
+ gr.Audio(
84
+ label="Audio Input",
85
+ type="filepath"
86
+ ),
87
+ gr.Slider(
88
+ label="seed",
89
+ minimum="0",
90
+ maximum="65535",
91
+ value="0",
92
+ step="1"
93
+ ),
94
+ gr.Slider(
95
+ minimum=0, maximum=10,
96
+ value=3.5,
97
+ label="Guidance Scale"
98
+ ),
99
+ gr.Slider(
100
+ minimum=1, maximum=500,
101
+ step=1, value=50,
102
+ label="Inference Steps"
103
+ ),
104
+ ]
105
+
106
+ # make an output audio widget
107
+ output = gr.Audio(label="Audio Output", type="filepath")
108
+
109
+ # Build the endpoint
110
+ ctrls_data, ctrls_button, process_button, cancel_button = build_endpoint(inputs, output, process_fn, card)
111
+
112
+ #webapp.queue()
113
+ webapp.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu118
2
+ git+https://github.com/huggingface/diffusers.git
3
+ git+https://github.com/huggingface/transformers.git
4
+ torch==2.0.1+cu118; sys_platform != 'darwin'
5
+ torch==2.0.1; sys_platform == 'darwin'
6
+ torchvision==0.15.2+cu118; sys_platform != 'darwin'
7
+ torchvision==0.15.2; sys_platform == 'darwin'
8
+ torchaudio==2.0.2+cu118; sys_platform != 'darwin'
9
+ torchaudio==2.0.2; sys_platform == 'darwin'
10
+ huggingface_hub
11
+ transformers==4.30.2
12
+ -e git+https://github.com/audacitorch/pyharp.git#egg=pyharp
13
+ descript-audiotools
14
+ scipy
15
+ datetime
16
+ gradio