DataBassist commited on
Commit
dae5b5d
Β·
1 Parent(s): 9551e0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -101
app.py CHANGED
@@ -1,137 +1,98 @@
1
- import os # 파일 및 디렉토리 μž‘μ—…μ„ μœ„ν•œ λͺ¨λ“ˆ
2
- import torch # λ”₯λŸ¬λ‹ ν”„λ ˆμž„μ›Œν¬ PyTorch
3
- import librosa # μ˜€λ””μ˜€ 처리λ₯Ό μœ„ν•œ λͺ¨λ“ˆ
4
- import binascii # 이진 데이터λ₯Ό λ‹€λ£¨λŠ” λͺ¨λ“ˆ
5
- import warnings # κ²½κ³  λ©”μ‹œμ§€λ₯Ό 좜λ ₯ν•˜λŠ” λͺ¨λ“ˆ
6
- import midi2audio # MIDI νŒŒμΌμ„ WAV 파일둜 λ³€ν™˜ν•˜λŠ” λͺ¨λ“ˆ
7
- import numpy as np # 닀차원 배열을 λ‹€λ£¨λŠ” λͺ¨λ“ˆ
8
- import pytube as pt # YouTube λ™μ˜μƒμ„ λ‹€μš΄λ‘œλ“œν•˜λŠ” λͺ¨λ“ˆ
9
- import gradio as gr # μΈν„°λž™ν‹°λΈŒν•œ UIλ₯Ό λ§Œλ“€κΈ° μœ„ν•œ λͺ¨λ“ˆ
10
- import soundfile as sf # μ‚¬μš΄λ“œ νŒŒμΌμ„ λ‹€λ£¨λŠ” λͺ¨λ“ˆ
11
- from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor # Pop2Piano λͺ¨λΈκ³Ό μ „μ²˜λ¦¬κΈ°
12
 
13
- yt_video_dir = "./yt_dir" # YouTube λ™μ˜μƒ λ‹€μš΄λ‘œλ“œ 디렉토리 경둜
14
- outputs_dir = "./midi_wav_outputs" # MIDI 및 WAV 파일 좜λ ₯ 디렉토리 경둜
15
- os.makedirs(outputs_dir, exist_ok=True) # 좜λ ₯ 디렉토리 생성 (이미 μ‘΄μž¬ν•˜λŠ” 경우 λ¬΄μ‹œ)
16
- os.makedirs(yt_video_dir, exist_ok=True) # YouTube λ™μ˜μƒ λ‹€μš΄λ‘œλ“œ 디렉토리 생성 (이미 μ‘΄μž¬ν•˜λŠ” 경우 λ¬΄μ‹œ)
 
17
 
18
- device = "cuda" if torch.cuda.is_available() else "cpu" # CUDAκ°€ μ‚¬μš© κ°€λŠ₯ν•œ 경우 GPUλ₯Ό μ‚¬μš©ν•˜κ³ , κ·Έλ ‡μ§€ μ•Šμ€ 경우 CPUλ₯Ό μ‚¬μš©
19
- model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano").to(device) # 사전 ν•™μŠ΅λœ Pop2Piano λͺ¨λΈ λ‘œλ“œ
20
- processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano") # 사전 ν•™μŠ΅λœ Pop2Piano μ „μ²˜λ¦¬κΈ° λ‘œλ“œ
21
- composers = model.generation_config.composer_to_feature_token.keys() # μž‘κ³‘κ°€ λͺ©λ‘ κ°€μ Έμ˜€κΈ°
 
22
 
 
23
  def get_audio_from_yt_video(yt_link):
24
  try:
25
- yt = pt.YouTube(yt_link) # YouTube λ™μ˜μƒ 객체 생성
26
- t = yt.streams.filter(only_audio=True) # μ˜€λ””μ˜€ 슀트림 필터링
27
- filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4") # 랜덀 파일 이름 생성
28
- t[0].download(filename=filename) # λ™μ˜μƒ λ‹€μš΄λ‘œλ“œ
29
  except:
30
- warnings.warn(f"Video Not Found at {yt_link}") # κ²½κ³  λ©”μ‹œμ§€ 좜λ ₯
31
  filename = None
32
-
33
  return filename, filename
34
-
35
- def inference(file_uploaded, composer):
36
- waveform, sr = librosa.load(file_uploaded, sr=None) # νŒŒμΌμ—μ„œ μ˜€λ””μ˜€ 데이터와 μƒ˜ν”Œλ§ 주파수 λ‘œλ“œ
37
-
38
- inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device) # μž…λ ₯ 데이터 μ „μ²˜λ¦¬
39
- model_output = model.generate(input_features=inputs["input_features"], composer=composer) # λͺ¨λΈμ— μž…λ ₯ν•˜μ—¬ 좜λ ₯ 생성
40
- tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"] # 토큰 λ””μ½”λ”©
41
-
42
- return prepare_output_file(tokenizer_output, sr) # 좜λ ₯ 파일 μ€€λΉ„ ν•¨μˆ˜ 호좜
43
 
44
- def prepare_output_file(tokenizer_output, sr):
45
- output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode() # 랜덀 좜λ ₯ 파일 이름 생성
46
- midi_output = os.path.join(outputs_dir, output_file_name + ".mid") # MIDI 좜λ ₯ 파일 경둜
47
-
48
- tokenizer_output[0].write(midi_output) # MIDI 파일 μž‘μ„±
49
-
50
- wav_output = midi_output.replace(".mid", ".wav") # WAV 좜λ ₯ 파일 경둜
51
- midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output) # MIDIλ₯Ό WAV둜 λ³€ν™˜
52
-
53
- return wav_output, wav_output, midi_output # WAV 및 MIDI 파일 경둜 λ°˜ν™˜
54
 
55
- def get_stereo(pop_path, midi, pop_scale=0.5):
56
- pop_y, sr = librosa.load(pop_path, sr=None) # 팝 μŒμ•… 파일 λ‘œλ“œ
57
- midi_y, _ = librosa.load(midi.name, sr=None) # MIDI 파일 λ‘œλ“œ
58
 
59
- if len(pop_y) > len(midi_y):
60
- midi_y = np.pad(midi_y, (0, len(pop_y) - len(midi_y))) # MIDI 길이λ₯Ό 팝 μŒμ•… 길이에 맞좀
61
- elif len(pop_y) < len(midi_y):
62
- pop_y = np.pad(pop_y, (0, -len(pop_y) + len(midi_y))) # 팝 μŒμ•… 길이λ₯Ό MIDI 길이에 맞좀
63
- stereo = np.stack((midi_y, pop_y * pop_scale)) # μŠ€ν…Œλ ˆμ˜€ 믹슀 생성
64
-
65
- stereo_mix_path = pop_path.replace("output", "output_stereo_mix") # μŠ€ν…Œλ ˆμ˜€ 믹슀 파일 경둜
66
- sf.write(file=stereo_mix_path, data=stereo.T, samplerate=sr, format="wav") # μŠ€ν…Œλ ˆμ˜€ 믹슀 파일 μž‘μ„±
67
-
68
- return stereo_mix_path, stereo_mix_path # μŠ€ν…Œλ ˆμ˜€ 믹슀 파일 경둜 λ°˜ν™˜
69
 
70
- block = gr.Blocks("Taithrah/Minimal") # Gradio 블둝 생성
 
71
 
72
  with block:
73
  gr.HTML(
74
  """
75
  <div style="text-align: center; max-width: 800px; margin: 0 auto;">
76
- <div
77
- style="
78
- display: inline-flex;
79
- align-items: center;
80
- gap: 0.8rem;
81
- font-size: 1.75rem;
82
- "
83
- >
84
- <h1 style="font-weight: 900; margin-bottom: 12px;">
85
- 🎹 Pop2Piano : ν”Όμ•„λ…Έ 컀버곑 생성기 🎹
86
- </h1>
87
- </div>
88
  <p style="margin-bottom: 12px; font-size: 90%">
89
- A demo for Pop2Piano: Pop Audio-based Piano Cover Generation. <br>
90
- Please select the composer (Arranger) and upload the pop audio or enter the YouTube link and then click Generate.
91
  </p>
92
  </div>
93
  """
94
  )
95
  with gr.Group():
96
- with gr.Row(equal_height=True):
97
  with gr.Column():
98
  file_uploaded = gr.Audio(label="μ˜€λ””μ˜€ μ—…λ‘œλ“œ", type="filepath")
99
  with gr.Column():
100
  with gr.Row():
101
  yt_link = gr.Textbox(label="유튜브 링크λ₯Ό μž…λ ₯ν•˜μ„Έμš”.", autofocus=True, lines=3)
102
  yt_btn = gr.Button("유튜브 λ§ν¬μ—μ„œ μ˜€λ””μ˜€λ₯Ό λ‹€μš΄ λ°›μŠ΅λ‹ˆλ‹€.", size="lg")
103
-
104
- yt_audio_path = gr.Audio(label="유튜브 λ™μ˜μƒμ—μ„œ μΆ”μΆœν•œ μ˜€λ””μ˜€", interactive=False)
105
- yt_btn.click(get_audio_from_yt_video, inputs=[yt_link], outputs=[yt_audio_path, file_uploaded])
106
 
107
  with gr.Group():
108
  with gr.Column():
109
- composer = gr.Dropdown(label="Arranger", choices=composers, value="composer1")
110
  generate_btn = gr.Button("λ‚˜λ§Œμ˜ ν”Όμ•„λ…Έ 컀버곑 λ§Œλ“€κΈ°πŸŽΉπŸŽ΅")
111
-
112
-
113
-
114
- with gr.Row().style(mobile_collapse=False, equal_height=True):
115
  wav_output2 = gr.File(label="λ‚˜λ§Œμ˜ ν”Όμ•„λ…Έ 컀버곑을 λ‹€μš΄λ‘œλ“œ (.wav)")
116
  wav_output1 = gr.Audio(label="λ‚˜λ§Œμ˜ ν”Όμ•„λ…Έ 컀버곑 λ“£κΈ°")
117
- midi_output = gr.File(label="μƒμ„±ν•œ MIDI 파일 λ‹€μš΄λ‘œλ“œ (.mid)")
118
- generate_btn.click(inference,
119
- inputs=[file_uploaded, composer],
120
- outputs=[wav_output1, wav_output2, midi_output])
121
-
122
-
123
-
124
-
125
-
126
- gr.HTML(
127
- """
128
- <div class="footer">
129
- <center><p><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Project Page</a>
130
- <center><a href="https://huggingface.co/docs/transformers/main/model_doc/pop2piano" style="text-decoration: underline;" target="_blank">HuggingFace Model Docs</a>
131
- <center><a href="https://github.com/sweetcocoa/pop2piano" style="text-decoration: underline;" target="_blank">Github</a>
132
- </p>
133
- </div>
134
- """
135
- )
136
 
137
- block.launch(debug=False)
 
1
+ import os
2
+ import torch
3
+ import librosa
4
+ import binascii
5
+ import warnings
6
+ import midi2audio # MIDI νŒŒμΌμ„ WAV 파일둜 λ³€ν™˜
7
+ import numpy as np
8
+ import pytube as pt # YouTube λΉ„λ””μ˜€λ₯Ό μ˜€λ””μ˜€λ‘œ λ‹€μš΄λ‘œλ“œ
9
+ import gradio as gr
10
+ import soundfile as sf
11
+ from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
12
 
13
+ # 디렉토리 생성
14
+ yt_video_dir = "./yt_dir" # 유튜브 λΉ„λ””μ˜€ λ‹€μš΄λ‘œλ“œ 경둜
15
+ outputs_dir = "./midi_wav_outputs" # 좜λ ₯ 파일 경둜
16
+ os.makedirs(outputs_dir, exist_ok=True)
17
+ os.makedirs(yt_video_dir, exist_ok=True)
18
 
19
+ # λͺ¨λΈ μ„€μ •
20
+ device = "cuda" if torch.cuda.is_available() else "cpu"
21
+ model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano").to(device)
22
+ processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
23
+ composers = model.generation_config.composer_to_feature_token.keys()
24
 
25
+ # 유튜브 λΉ„λ””μ˜€μ—μ„œ μ˜€λ””μ˜€ μΆ”μΆœ ν•¨μˆ˜
26
  def get_audio_from_yt_video(yt_link):
27
  try:
28
+ yt = pt.YouTube(yt_link)
29
+ t = yt.streams.filter(only_audio=True)
30
+ filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4")
31
+ t[0].download(filename=filename)
32
  except:
33
+ warnings.warn(f"Video Not Found at {yt_link}")
34
  filename = None
35
+
36
  return filename, filename
 
 
 
 
 
 
 
 
 
37
 
38
+ # λͺ¨λΈ μΆ”λ‘  ν•¨μˆ˜
39
+ def inference(file_uploaded, composer):
40
+ waveform, sr = librosa.load(file_uploaded, sr=None)
41
+ inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device)
42
+ model_output = model.generate(input_features=inputs["input_features"], composer=composer)
43
+ tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"]
 
 
 
 
44
 
45
+ return prepare_output_file(tokenizer_output, sr)
 
 
46
 
47
+ # 좜λ ₯ 파일 μ€€λΉ„ ν•¨μˆ˜
48
+ def prepare_output_file(tokenizer_output, sr):
49
+ output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode()
50
+ midi_output = os.path.join(outputs_dir, output_file_name + ".mid")
51
+ tokenizer_output[0].write(midi_output)
52
+ wav_output = midi_output.replace(".mid", ".wav")
53
+ midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output)
54
+
55
+ return wav_output, wav_output, midi_output
 
56
 
57
+ # Gradio UI μ„€μ •
58
+ block = gr.Blocks(theme="Taithrah/Minimal")
59
 
60
  with block:
61
  gr.HTML(
62
  """
63
  <div style="text-align: center; max-width: 800px; margin: 0 auto;">
64
+ <h1 style="font-weight: 900; margin-bottom: 12px;">
65
+ 🎹 Pop2Piano : ν”Όμ•„λ…Έ 컀버곑 생성기 🎹
66
+ </h1>
 
 
 
 
 
 
 
 
 
67
  <p style="margin-bottom: 12px; font-size: 90%">
68
+ Pop2Piano 데λͺ¨: 팝 μ˜€λ””μ˜€ 기반 ν”Όμ•„λ…Έ 컀버곑 생성. <br>
69
+ μž‘κ³‘κ°€(편곑자)λ₯Ό μ„ νƒν•˜κ³  팝 μ˜€λ””μ˜€λ₯Ό μ—…λ‘œλ“œν•˜κ±°λ‚˜ 유튜브 링크λ₯Ό μž…λ ₯ν•œ ν›„ 생성 λ²„νŠΌμ„ ν΄λ¦­ν•˜μ„Έμš”.
70
  </p>
71
  </div>
72
  """
73
  )
74
  with gr.Group():
75
+ with gr.Row():
76
  with gr.Column():
77
  file_uploaded = gr.Audio(label="μ˜€λ””μ˜€ μ—…λ‘œλ“œ", type="filepath")
78
  with gr.Column():
79
  with gr.Row():
80
  yt_link = gr.Textbox(label="유튜브 링크λ₯Ό μž…λ ₯ν•˜μ„Έμš”.", autofocus=True, lines=3)
81
  yt_btn = gr.Button("유튜브 λ§ν¬μ—μ„œ μ˜€λ””μ˜€λ₯Ό λ‹€μš΄ λ°›μŠ΅λ‹ˆλ‹€.", size="lg")
82
+ yt_audio_path = gr.Audio(label="유튜브 λ™μ˜μƒμ—μ„œ μΆ”μΆœν•œ μ˜€λ””μ˜€", interactive=False)
83
+ yt_btn.click(get_audio_from_yt_video, inputs=[yt_link], outputs=[yt_audio_path, file_uploaded])
 
84
 
85
  with gr.Group():
86
  with gr.Column():
87
+ composer = gr.Dropdown(label="편곑자", choices=composers, value="composer1")
88
  generate_btn = gr.Button("λ‚˜λ§Œμ˜ ν”Όμ•„λ…Έ 컀버곑 λ§Œλ“€κΈ°πŸŽΉπŸŽ΅")
89
+ with gr.Row():
 
 
 
90
  wav_output2 = gr.File(label="λ‚˜λ§Œμ˜ ν”Όμ•„λ…Έ 컀버곑을 λ‹€μš΄λ‘œλ“œ (.wav)")
91
  wav_output1 = gr.Audio(label="λ‚˜λ§Œμ˜ ν”Όμ•„λ…Έ 컀버곑 λ“£κΈ°")
92
+ midi_output = gr.File(label="μƒμ„±ν•œ midi 파일 λ‹€μš΄λ‘œλ“œ (.mid)")
93
+ generate_btn.click(
94
+ inference,
95
+ inputs=[file_uploaded, composer],
96
+ outputs=[wav_output1, wav_output2, midi_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ block.launch(debug=False)