hayaton0005 commited on
Commit
e55e137
·
verified ·
1 Parent(s): 824b042

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +40 -11
  3. app.txt +1 -0
  4. requirements.txt +2 -0
  5. soundfont.sf2 +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ soundfont.sf2 filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,17 +1,46 @@
1
  import gradio as gr
 
2
  from infer import infer_midi_from_wav
 
3
 
4
- def transcribe(wav_path):
5
- midi_path = infer_midi_from_wav(wav_path)
6
- return midi_path
7
 
8
- demo = gr.Interface(
9
- fn=transcribe,
10
- inputs=gr.Audio(type="filepath"), # ✅ 修正ポイント
11
- outputs=gr.File(label="Download MIDI"),
12
- title="Humming to MIDI",
13
- description="Upload your humming (WAV) to get MIDI output"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  )
15
 
16
- if __name__ == "__main__":
17
- demo.launch()
 
1
  import gradio as gr
2
+ import os
3
  from infer import infer_midi_from_wav
4
+ import subprocess
5
 
6
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
7
 
8
+ # MIDI → WAV 変換関数(fluidsynth使用)
9
+ def convert_midi_to_wav(midi_path):
10
+ soundfont_path = os.path.join(BASE_DIR, "soundfont.sf2") # SoundFontファイルを用意
11
+ wav_path = os.path.join(BASE_DIR, "synth_output.wav")
12
+
13
+ command = [
14
+ "fluidsynth",
15
+ "-ni",
16
+ soundfont_path,
17
+ midi_path,
18
+ "-F",
19
+ wav_path,
20
+ "-r",
21
+ "44100"
22
+ ]
23
+ result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
24
+ if result.returncode != 0:
25
+ raise RuntimeError("fluidsynth conversion failed:\n" + result.stderr.decode())
26
+ return wav_path
27
+
28
+ # Gradioで呼び出す推論&再生用関数
29
+ def transcribe_and_play(wav_file):
30
+ midi_path = infer_midi_from_wav(wav_file)
31
+ wav_output_path = convert_midi_to_wav(midi_path)
32
+ return wav_output_path, midi_path
33
+
34
+ # Gradio UI
35
+ interface = gr.Interface(
36
+ fn=transcribe_and_play,
37
+ inputs=gr.Audio(source="microphone", type="filepath", label="\u97f3\u58f0\u9332\u97f3"),
38
+ outputs=[
39
+ gr.Audio(label="\u30d4\u30a2\u30ce\u97f3\u3067\u518d\u751f"),
40
+ gr.File(label="MIDI\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9")
41
+ ],
42
+ title="\u9f3b\u6b4c\u304b\u3089\u306eMIDI\u5909\u63db\u30c7\u30e2",
43
+ description="\u9332\u97f3\u3057\u305f\u97f3\u58f0\u3092MIDI\u306b\u5909\u63db\u3057\u3001\u30d4\u30a2\u30ce\u97f3\u3067\u518d\u751f\u3057\u307e\u3059"
44
  )
45
 
46
+ interface.launch()
 
app.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ fluidsynth
requirements.txt CHANGED
@@ -17,6 +17,8 @@ tqdm
17
  librosa
18
  soundfile
19
  gradio
 
 
20
 
21
 
22
  # MIDI・音楽処理
 
17
  librosa
18
  soundfile
19
  gradio
20
+ fluidsynth
21
+
22
 
23
 
24
  # MIDI・音楽処理
soundfont.sf2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74594e8f4250680adf590507a306655a299935343583256f3b722c48a1bc1cb0
3
+ size 148398306