Spaces:
Sleeping
Sleeping
Commit
Β·
dae5b5d
1
Parent(s):
9551e0c
Update app.py
Browse files
app.py
CHANGED
@@ -1,137 +1,98 @@
|
|
1 |
-
import os
|
2 |
-
import torch
|
3 |
-
import librosa
|
4 |
-
import binascii
|
5 |
-
import warnings
|
6 |
-
import midi2audio # MIDI νμΌμ WAV νμΌλ‘
|
7 |
-
import numpy as np
|
8 |
-
import pytube as pt # YouTube
|
9 |
-
import gradio as gr
|
10 |
-
import soundfile as sf
|
11 |
-
from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
os.makedirs(
|
|
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
22 |
|
|
|
23 |
def get_audio_from_yt_video(yt_link):
|
24 |
try:
|
25 |
-
yt = pt.YouTube(yt_link)
|
26 |
-
t = yt.streams.filter(only_audio=True)
|
27 |
-
filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4")
|
28 |
-
t[0].download(filename=filename)
|
29 |
except:
|
30 |
-
warnings.warn(f"Video Not Found at {yt_link}")
|
31 |
filename = None
|
32 |
-
|
33 |
return filename, filename
|
34 |
-
|
35 |
-
def inference(file_uploaded, composer):
|
36 |
-
waveform, sr = librosa.load(file_uploaded, sr=None) # νμΌμμ μ€λμ€ λ°μ΄ν°μ μνλ§ μ£Όνμ λ‘λ
|
37 |
-
|
38 |
-
inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device) # μ
λ ₯ λ°μ΄ν° μ μ²λ¦¬
|
39 |
-
model_output = model.generate(input_features=inputs["input_features"], composer=composer) # λͺ¨λΈμ μ
λ ₯νμ¬ μΆλ ₯ μμ±
|
40 |
-
tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"] # ν ν° λμ½λ©
|
41 |
-
|
42 |
-
return prepare_output_file(tokenizer_output, sr) # μΆλ ₯ νμΌ μ€λΉ ν¨μ νΈμΆ
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
wav_output = midi_output.replace(".mid", ".wav") # WAV μΆλ ₯ νμΌ κ²½λ‘
|
51 |
-
midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output) # MIDIλ₯Ό WAVλ‘ λ³ν
|
52 |
-
|
53 |
-
return wav_output, wav_output, midi_output # WAV λ° MIDI νμΌ κ²½λ‘ λ°ν
|
54 |
|
55 |
-
|
56 |
-
pop_y, sr = librosa.load(pop_path, sr=None) # ν μμ
νμΌ λ‘λ
|
57 |
-
midi_y, _ = librosa.load(midi.name, sr=None) # MIDI νμΌ λ‘λ
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
return stereo_mix_path, stereo_mix_path # μ€ν
λ μ€ λ―Ήμ€ νμΌ κ²½λ‘ λ°ν
|
69 |
|
70 |
-
|
|
|
71 |
|
72 |
with block:
|
73 |
gr.HTML(
|
74 |
"""
|
75 |
<div style="text-align: center; max-width: 800px; margin: 0 auto;">
|
76 |
-
<
|
77 |
-
|
78 |
-
|
79 |
-
align-items: center;
|
80 |
-
gap: 0.8rem;
|
81 |
-
font-size: 1.75rem;
|
82 |
-
"
|
83 |
-
>
|
84 |
-
<h1 style="font-weight: 900; margin-bottom: 12px;">
|
85 |
-
πΉ Pop2Piano : νΌμλ
Έ 컀λ²κ³‘ μμ±κΈ° πΉ
|
86 |
-
</h1>
|
87 |
-
</div>
|
88 |
<p style="margin-bottom: 12px; font-size: 90%">
|
89 |
-
|
90 |
-
|
91 |
</p>
|
92 |
</div>
|
93 |
"""
|
94 |
)
|
95 |
with gr.Group():
|
96 |
-
with gr.Row(
|
97 |
with gr.Column():
|
98 |
file_uploaded = gr.Audio(label="μ€λμ€ μ
λ‘λ", type="filepath")
|
99 |
with gr.Column():
|
100 |
with gr.Row():
|
101 |
yt_link = gr.Textbox(label="μ νλΈ λ§ν¬λ₯Ό μ
λ ₯νμΈμ.", autofocus=True, lines=3)
|
102 |
yt_btn = gr.Button("μ νλΈ λ§ν¬μμ μ€λμ€λ₯Ό λ€μ΄ λ°μ΅λλ€.", size="lg")
|
103 |
-
|
104 |
-
|
105 |
-
yt_btn.click(get_audio_from_yt_video, inputs=[yt_link], outputs=[yt_audio_path, file_uploaded])
|
106 |
|
107 |
with gr.Group():
|
108 |
with gr.Column():
|
109 |
-
composer = gr.Dropdown(label="
|
110 |
generate_btn = gr.Button("λλ§μ νΌμλ
Έ 컀λ²κ³‘ λ§λ€κΈ°πΉπ΅")
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
115 |
wav_output2 = gr.File(label="λλ§μ νΌμλ
Έ 컀λ²κ³‘μ λ€μ΄λ‘λ (.wav)")
|
116 |
wav_output1 = gr.Audio(label="λλ§μ νΌμλ
Έ 컀λ²κ³‘ λ£κΈ°")
|
117 |
-
midi_output = gr.File(label="μμ±ν
|
118 |
-
generate_btn.click(
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
gr.HTML(
|
127 |
-
"""
|
128 |
-
<div class="footer">
|
129 |
-
<center><p><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Project Page</a>
|
130 |
-
<center><a href="https://huggingface.co/docs/transformers/main/model_doc/pop2piano" style="text-decoration: underline;" target="_blank">HuggingFace Model Docs</a>
|
131 |
-
<center><a href="https://github.com/sweetcocoa/pop2piano" style="text-decoration: underline;" target="_blank">Github</a>
|
132 |
-
</p>
|
133 |
-
</div>
|
134 |
-
"""
|
135 |
-
)
|
136 |
|
137 |
-
block.launch(debug=False)
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
+
import librosa
|
4 |
+
import binascii
|
5 |
+
import warnings
|
6 |
+
import midi2audio # MIDI νμΌμ WAV νμΌλ‘ λ³ν
|
7 |
+
import numpy as np
|
8 |
+
import pytube as pt # YouTube λΉλμ€λ₯Ό μ€λμ€λ‘ λ€μ΄λ‘λ
|
9 |
+
import gradio as gr
|
10 |
+
import soundfile as sf
|
11 |
+
from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
|
12 |
|
13 |
+
# λλ ν 리 μμ±
|
14 |
+
yt_video_dir = "./yt_dir" # μ νλΈ λΉλμ€ λ€μ΄λ‘λ κ²½λ‘
|
15 |
+
outputs_dir = "./midi_wav_outputs" # μΆλ ₯ νμΌ κ²½λ‘
|
16 |
+
os.makedirs(outputs_dir, exist_ok=True)
|
17 |
+
os.makedirs(yt_video_dir, exist_ok=True)
|
18 |
|
19 |
+
# λͺ¨λΈ μ€μ
|
20 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
21 |
+
model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano").to(device)
|
22 |
+
processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
|
23 |
+
composers = model.generation_config.composer_to_feature_token.keys()
|
24 |
|
25 |
+
# μ νλΈ λΉλμ€μμ μ€λμ€ μΆμΆ ν¨μ
|
26 |
def get_audio_from_yt_video(yt_link):
|
27 |
try:
|
28 |
+
yt = pt.YouTube(yt_link)
|
29 |
+
t = yt.streams.filter(only_audio=True)
|
30 |
+
filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4")
|
31 |
+
t[0].download(filename=filename)
|
32 |
except:
|
33 |
+
warnings.warn(f"Video Not Found at {yt_link}")
|
34 |
filename = None
|
35 |
+
|
36 |
return filename, filename
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
# λͺ¨λΈ μΆλ‘ ν¨μ
|
39 |
+
def inference(file_uploaded, composer):
|
40 |
+
waveform, sr = librosa.load(file_uploaded, sr=None)
|
41 |
+
inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device)
|
42 |
+
model_output = model.generate(input_features=inputs["input_features"], composer=composer)
|
43 |
+
tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"]
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
return prepare_output_file(tokenizer_output, sr)
|
|
|
|
|
46 |
|
47 |
+
# μΆλ ₯ νμΌ μ€λΉ ν¨μ
|
48 |
+
def prepare_output_file(tokenizer_output, sr):
|
49 |
+
output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode()
|
50 |
+
midi_output = os.path.join(outputs_dir, output_file_name + ".mid")
|
51 |
+
tokenizer_output[0].write(midi_output)
|
52 |
+
wav_output = midi_output.replace(".mid", ".wav")
|
53 |
+
midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output)
|
54 |
+
|
55 |
+
return wav_output, wav_output, midi_output
|
|
|
56 |
|
57 |
+
# Gradio UI μ€μ
|
58 |
+
block = gr.Blocks(theme="Taithrah/Minimal")
|
59 |
|
60 |
with block:
|
61 |
gr.HTML(
|
62 |
"""
|
63 |
<div style="text-align: center; max-width: 800px; margin: 0 auto;">
|
64 |
+
<h1 style="font-weight: 900; margin-bottom: 12px;">
|
65 |
+
πΉ Pop2Piano : νΌμλ
Έ 컀λ²κ³‘ μμ±κΈ° πΉ
|
66 |
+
</h1>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
<p style="margin-bottom: 12px; font-size: 90%">
|
68 |
+
Pop2Piano λ°λͺ¨: ν μ€λμ€ κΈ°λ° νΌμλ
Έ 컀λ²κ³‘ μμ±. <br>
|
69 |
+
μ곑κ°(νΈκ³‘μ)λ₯Ό μ ννκ³ ν μ€λμ€λ₯Ό μ
λ‘λνκ±°λ μ νλΈ λ§ν¬λ₯Ό μ
λ ₯ν ν μμ± λ²νΌμ ν΄λ¦νμΈμ.
|
70 |
</p>
|
71 |
</div>
|
72 |
"""
|
73 |
)
|
74 |
with gr.Group():
|
75 |
+
with gr.Row():
|
76 |
with gr.Column():
|
77 |
file_uploaded = gr.Audio(label="μ€λμ€ μ
λ‘λ", type="filepath")
|
78 |
with gr.Column():
|
79 |
with gr.Row():
|
80 |
yt_link = gr.Textbox(label="μ νλΈ λ§ν¬λ₯Ό μ
λ ₯νμΈμ.", autofocus=True, lines=3)
|
81 |
yt_btn = gr.Button("μ νλΈ λ§ν¬μμ μ€λμ€λ₯Ό λ€μ΄ λ°μ΅λλ€.", size="lg")
|
82 |
+
yt_audio_path = gr.Audio(label="μ νλΈ λμμμμ μΆμΆν μ€λμ€", interactive=False)
|
83 |
+
yt_btn.click(get_audio_from_yt_video, inputs=[yt_link], outputs=[yt_audio_path, file_uploaded])
|
|
|
84 |
|
85 |
with gr.Group():
|
86 |
with gr.Column():
|
87 |
+
composer = gr.Dropdown(label="νΈκ³‘μ", choices=composers, value="composer1")
|
88 |
generate_btn = gr.Button("λλ§μ νΌμλ
Έ 컀λ²κ³‘ λ§λ€κΈ°πΉπ΅")
|
89 |
+
with gr.Row():
|
|
|
|
|
|
|
90 |
wav_output2 = gr.File(label="λλ§μ νΌμλ
Έ 컀λ²κ³‘μ λ€μ΄λ‘λ (.wav)")
|
91 |
wav_output1 = gr.Audio(label="λλ§μ νΌμλ
Έ 컀λ²κ³‘ λ£κΈ°")
|
92 |
+
midi_output = gr.File(label="μμ±ν midi νμΌ λ€μ΄λ‘λ (.mid)")
|
93 |
+
generate_btn.click(
|
94 |
+
inference,
|
95 |
+
inputs=[file_uploaded, composer],
|
96 |
+
outputs=[wav_output1, wav_output2, midi_output])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
+
block.launch(debug=False)
|