Spaces:
Paused
Paused
Upload 3 files
Browse files- app.py +95 -0
- requirements.txt +15 -0
- utils_violin_transcript.py +0 -0
app.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils_violin_transcript import PretrainedModel
|
2 |
+
from json import load as json_load
|
3 |
+
from huggingface_hub import hf_hub_download
|
4 |
+
from torch import device as Device
|
5 |
+
from torch.cuda import is_available as cuda_is_available
|
6 |
+
from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
|
7 |
+
from librosa import load as librosa_load, piptrack, hz_to_midi
|
8 |
+
from mido import MidiFile, MidiTrack, Message, MetaMessage, bpm2tempo
|
9 |
+
from basic_pitch.inference import predict as basic_pitch_predict
|
10 |
+
from numpy import argmax as np_argmax, isnan as np_isnan
|
11 |
+
|
12 |
+
device = Device("cuda" if cuda_is_available() else "cpu")
|
13 |
+
|
14 |
+
class Pop2Piano:
|
15 |
+
def __init__(self,device:Device,model_id_path="sweetcocoa/pop2piano"):
|
16 |
+
self.model = Pop2PianoForConditionalGeneration.from_pretrained(model_id_path).to(device)
|
17 |
+
self.processor = Pop2PianoProcessor.from_pretrained(model_id_path)
|
18 |
+
def audio2midi(self,input,composer,num_bars,num_beams,steps_per_beat):
|
19 |
+
data, sr = librosa_load(input, sr=None)
|
20 |
+
inputs = self.processor(data, sr, steps_per_beat,return_tensors="pt",num_bars=num_bars)
|
21 |
+
self.processor.batch_decode(self.model.generate(num_beams=num_beams,do_sample=True,input_features=inputs["input_features"], composer="composer" + str(composer)),inputs)["pretty_midi_objects"][0].write(open("output.mid", "wb"))
|
22 |
+
return "output.mid"
|
23 |
+
|
24 |
+
def smooth_pitch_sequence(pitches, magnitudes, threshold=0.1):
|
25 |
+
midi_sequence = []
|
26 |
+
for i in range(pitches.shape[1]):
|
27 |
+
index = np_argmax(magnitudes[:, i])
|
28 |
+
pitch_mag = magnitudes[index, i]
|
29 |
+
pitch = pitches[index, i]
|
30 |
+
if pitch_mag < threshold or np_isnan(pitch) or pitch <= 0:
|
31 |
+
midi_sequence.append(None)
|
32 |
+
else:
|
33 |
+
midi_note = int(round(hz_to_midi(pitch)))
|
34 |
+
midi_sequence.append(midi_note)
|
35 |
+
return midi_sequence
|
36 |
+
|
37 |
+
def clean_midi_sequence(sequence, min_note_length=2):
|
38 |
+
cleaned = []
|
39 |
+
current_note = None
|
40 |
+
count = 0
|
41 |
+
for note in sequence + [None]:
|
42 |
+
if note == current_note:
|
43 |
+
count += 1
|
44 |
+
else:
|
45 |
+
if current_note is not None and count >= min_note_length:
|
46 |
+
cleaned.extend([current_note] * count)
|
47 |
+
else:
|
48 |
+
cleaned.extend([None] * count)
|
49 |
+
current_note = note
|
50 |
+
count = 1
|
51 |
+
return cleaned
|
52 |
+
|
53 |
+
def basic_to_midi(input_file, tempo_bpm=120):
|
54 |
+
wav, sr = librosa_load(input_file)
|
55 |
+
audio_duration = len(wav) / sr
|
56 |
+
pitches, magnitudes = piptrack(y=wav, sr=sr, hop_length=512)
|
57 |
+
midi_sequence = clean_midi_sequence(smooth_pitch_sequence(pitches, magnitudes))
|
58 |
+
total_frames = len(midi_sequence)
|
59 |
+
ticks_per_beat = 480
|
60 |
+
tempo = bpm2tempo(tempo_bpm)
|
61 |
+
ticks_per_second = (ticks_per_beat * tempo_bpm) / 60
|
62 |
+
time_per_frame = max(1, round((audio_duration * ticks_per_second) / total_frames))
|
63 |
+
midi_file = MidiFile(ticks_per_beat=ticks_per_beat)
|
64 |
+
track = MidiTrack()
|
65 |
+
midi_file.tracks.append(track)
|
66 |
+
track.append(MetaMessage('set_tempo', tempo=tempo))
|
67 |
+
last_note = None
|
68 |
+
duration = 0
|
69 |
+
for note in midi_sequence:
|
70 |
+
if note != last_note:
|
71 |
+
if last_note is not None:
|
72 |
+
track.append(Message('note_off', note=last_note, velocity=0, time=duration))
|
73 |
+
duration = 0
|
74 |
+
if note is not None:
|
75 |
+
track.append(Message('note_on', note=note, velocity=100, time=0))
|
76 |
+
last_note = note
|
77 |
+
duration += time_per_frame
|
78 |
+
if last_note is not None:
|
79 |
+
track.append(Message('note_off', note=last_note, velocity=0, time=duration))
|
80 |
+
midi_file.save("output.mid")
|
81 |
+
return "output.mid"
|
82 |
+
|
83 |
+
def spotify_to_midi(input_audio_path,tempo=120):
|
84 |
+
_, midi_data, _ = basic_pitch_predict(input_audio_path,midi_tempo=tempo)
|
85 |
+
midi_data.write("output.mid")
|
86 |
+
mid = MidiFile("output.mid")
|
87 |
+
for i, track in enumerate(mid.tracks):
|
88 |
+
mid.tracks[i] = [msg for msg in track if msg.type != 'program_change']
|
89 |
+
mid.save("output.mid")
|
90 |
+
return "output.mid"
|
91 |
+
|
92 |
+
|
93 |
+
import gradio as gr
|
94 |
+
gr.TabbedInterface([gr.Interface(basic_to_midi,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(120,label="BPM")],gr.File(label="Midi File")),gr.Interface(spotify_to_midi,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(120,label="BPM")],gr.File(label="Midi File")),gr.Interface(PretrainedModel(json_load(open(hf_hub_download("shethjenil/Audio2ViolinMidi","violin.json"))),hf_hub_download("shethjenil/Audio2ViolinMidi","violin_model.pt"),device).transcribe_music, [gr.Audio(label="Upload your Audio file",type="filepath"),gr.Number(32,label="Batch size"),gr.Radio(["spotify","rebab","tiktok"],value="spotify",label="Post Processing")],gr.File(label="Download MIDI file")),gr.Interface(Pop2Piano(device).audio2midi,[gr.Audio(label="Input Audio",type="filepath"),gr.Number(1, minimum=1, maximum=21, label="Composer"),gr.Number(2,label="Details in Piano"),gr.Number(1,label="Efficiency of Piano"),gr.Radio([1,2,4],label="steps per beat",value=2)],gr.File(label="MIDI File"))],["Basic","Medium","Advance","More Advance"]).launch()
|
95 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy
|
2 |
+
scipy
|
3 |
+
numba
|
4 |
+
matplotlib
|
5 |
+
librosa
|
6 |
+
mido
|
7 |
+
torchaudio
|
8 |
+
mir_eval
|
9 |
+
pretty_midi @ git+https://github.com/craffel/pretty-midi.git
|
10 |
+
basic-pitch[coreml,tf,onnx]
|
11 |
+
torch
|
12 |
+
huggingface-hub
|
13 |
+
essentia
|
14 |
+
resampy
|
15 |
+
transformers==4.45.2
|
utils_violin_transcript.py
ADDED
The diff for this file is too large to render.
See raw diff
|
|