shethjenil commited on
Commit
f8b3075
·
verified ·
1 Parent(s): 56959b4

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +95 -0
  2. requirements.txt +15 -0
  3. utils_violin_transcript.py +0 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils_violin_transcript import PretrainedModel
2
+ from json import load as json_load
3
+ from huggingface_hub import hf_hub_download
4
+ from torch import device as Device
5
+ from torch.cuda import is_available as cuda_is_available
6
+ from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
7
+ from librosa import load as librosa_load, piptrack, hz_to_midi
8
+ from mido import MidiFile, MidiTrack, Message, MetaMessage, bpm2tempo
9
+ from basic_pitch.inference import predict as basic_pitch_predict
10
+ from numpy import argmax as np_argmax, isnan as np_isnan
11
+
12
+ device = Device("cuda" if cuda_is_available() else "cpu")
13
+
14
+ class Pop2Piano:
15
+ def __init__(self,device:Device,model_id_path="sweetcocoa/pop2piano"):
16
+ self.model = Pop2PianoForConditionalGeneration.from_pretrained(model_id_path).to(device)
17
+ self.processor = Pop2PianoProcessor.from_pretrained(model_id_path)
18
+ def audio2midi(self,input,composer,num_bars,num_beams,steps_per_beat):
19
+ data, sr = librosa_load(input, sr=None)
20
+ inputs = self.processor(data, sr, steps_per_beat,return_tensors="pt",num_bars=num_bars)
21
+ self.processor.batch_decode(self.model.generate(num_beams=num_beams,do_sample=True,input_features=inputs["input_features"], composer="composer" + str(composer)),inputs)["pretty_midi_objects"][0].write(open("output.mid", "wb"))
22
+ return "output.mid"
23
+
24
+ def smooth_pitch_sequence(pitches, magnitudes, threshold=0.1):
25
+ midi_sequence = []
26
+ for i in range(pitches.shape[1]):
27
+ index = np_argmax(magnitudes[:, i])
28
+ pitch_mag = magnitudes[index, i]
29
+ pitch = pitches[index, i]
30
+ if pitch_mag < threshold or np_isnan(pitch) or pitch <= 0:
31
+ midi_sequence.append(None)
32
+ else:
33
+ midi_note = int(round(hz_to_midi(pitch)))
34
+ midi_sequence.append(midi_note)
35
+ return midi_sequence
36
+
37
+ def clean_midi_sequence(sequence, min_note_length=2):
38
+ cleaned = []
39
+ current_note = None
40
+ count = 0
41
+ for note in sequence + [None]:
42
+ if note == current_note:
43
+ count += 1
44
+ else:
45
+ if current_note is not None and count >= min_note_length:
46
+ cleaned.extend([current_note] * count)
47
+ else:
48
+ cleaned.extend([None] * count)
49
+ current_note = note
50
+ count = 1
51
+ return cleaned
52
+
53
+ def basic_to_midi(input_file, tempo_bpm=120):
54
+ wav, sr = librosa_load(input_file)
55
+ audio_duration = len(wav) / sr
56
+ pitches, magnitudes = piptrack(y=wav, sr=sr, hop_length=512)
57
+ midi_sequence = clean_midi_sequence(smooth_pitch_sequence(pitches, magnitudes))
58
+ total_frames = len(midi_sequence)
59
+ ticks_per_beat = 480
60
+ tempo = bpm2tempo(tempo_bpm)
61
+ ticks_per_second = (ticks_per_beat * tempo_bpm) / 60
62
+ time_per_frame = max(1, round((audio_duration * ticks_per_second) / total_frames))
63
+ midi_file = MidiFile(ticks_per_beat=ticks_per_beat)
64
+ track = MidiTrack()
65
+ midi_file.tracks.append(track)
66
+ track.append(MetaMessage('set_tempo', tempo=tempo))
67
+ last_note = None
68
+ duration = 0
69
+ for note in midi_sequence:
70
+ if note != last_note:
71
+ if last_note is not None:
72
+ track.append(Message('note_off', note=last_note, velocity=0, time=duration))
73
+ duration = 0
74
+ if note is not None:
75
+ track.append(Message('note_on', note=note, velocity=100, time=0))
76
+ last_note = note
77
+ duration += time_per_frame
78
+ if last_note is not None:
79
+ track.append(Message('note_off', note=last_note, velocity=0, time=duration))
80
+ midi_file.save("output.mid")
81
+ return "output.mid"
82
+
83
+ def spotify_to_midi(input_audio_path,tempo=120):
84
+ _, midi_data, _ = basic_pitch_predict(input_audio_path,midi_tempo=tempo)
85
+ midi_data.write("output.mid")
86
+ mid = MidiFile("output.mid")
87
+ for i, track in enumerate(mid.tracks):
88
+ mid.tracks[i] = [msg for msg in track if msg.type != 'program_change']
89
+ mid.save("output.mid")
90
+ return "output.mid"
91
+
92
+
93
+ import gradio as gr
94
+ gr.TabbedInterface([gr.Interface(basic_to_midi,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(120,label="BPM")],gr.File(label="Midi File")),gr.Interface(spotify_to_midi,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(120,label="BPM")],gr.File(label="Midi File")),gr.Interface(PretrainedModel(json_load(open(hf_hub_download("shethjenil/Audio2ViolinMidi","violin.json"))),hf_hub_download("shethjenil/Audio2ViolinMidi","violin_model.pt"),device).transcribe_music, [gr.Audio(label="Upload your Audio file",type="filepath"),gr.Number(32,label="Batch size"),gr.Radio(["spotify","rebab","tiktok"],value="spotify",label="Post Processing")],gr.File(label="Download MIDI file")),gr.Interface(Pop2Piano(device).audio2midi,[gr.Audio(label="Input Audio",type="filepath"),gr.Number(1, minimum=1, maximum=21, label="Composer"),gr.Number(2,label="Details in Piano"),gr.Number(1,label="Efficiency of Piano"),gr.Radio([1,2,4],label="steps per beat",value=2)],gr.File(label="MIDI File"))],["Basic","Medium","Advance","More Advance"]).launch()
95
+
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy
2
+ scipy
3
+ numba
4
+ matplotlib
5
+ librosa
6
+ mido
7
+ torchaudio
8
+ mir_eval
9
+ pretty_midi @ git+https://github.com/craffel/pretty-midi.git
10
+ basic-pitch[coreml,tf,onnx]
11
+ torch
12
+ huggingface-hub
13
+ essentia
14
+ resampy
15
+ transformers==4.45.2
utils_violin_transcript.py ADDED
The diff for this file is too large to render. See raw diff