Spaces:
Paused
Paused
from utils_violin_transcript import PretrainedModel | |
from json import load as json_load | |
from huggingface_hub import hf_hub_download | |
from torch import device as Device | |
from torch.cuda import is_available as cuda_is_available | |
from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor | |
from librosa import load as librosa_load, piptrack, hz_to_midi | |
from mido import MidiFile, MidiTrack, Message, MetaMessage, bpm2tempo | |
from basic_pitch.inference import predict as basic_pitch_predict | |
from numpy import argmax as np_argmax, isnan as np_isnan | |
device = Device("cuda" if cuda_is_available() else "cpu") | |
class Pop2Piano: | |
def __init__(self,device:Device,model_id_path="sweetcocoa/pop2piano"): | |
self.model = Pop2PianoForConditionalGeneration.from_pretrained(model_id_path).to(device) | |
self.processor = Pop2PianoProcessor.from_pretrained(model_id_path) | |
def audio2midi(self,input,composer,num_bars,num_beams,steps_per_beat): | |
data, sr = librosa_load(input, sr=None) | |
inputs = self.processor(data, sr, steps_per_beat,return_tensors="pt",num_bars=num_bars) | |
self.processor.batch_decode(self.model.generate(num_beams=num_beams,do_sample=True,input_features=inputs["input_features"], composer="composer" + str(composer)),inputs)["pretty_midi_objects"][0].write(open("output.mid", "wb")) | |
return "output.mid" | |
def smooth_pitch_sequence(pitches, magnitudes, threshold=0.1): | |
midi_sequence = [] | |
for i in range(pitches.shape[1]): | |
index = np_argmax(magnitudes[:, i]) | |
pitch_mag = magnitudes[index, i] | |
pitch = pitches[index, i] | |
if pitch_mag < threshold or np_isnan(pitch) or pitch <= 0: | |
midi_sequence.append(None) | |
else: | |
midi_note = int(round(hz_to_midi(pitch))) | |
midi_sequence.append(midi_note) | |
return midi_sequence | |
def clean_midi_sequence(sequence, min_note_length=2): | |
cleaned = [] | |
current_note = None | |
count = 0 | |
for note in sequence + [None]: | |
if note == current_note: | |
count += 1 | |
else: | |
if current_note is not None and count >= min_note_length: | |
cleaned.extend([current_note] * count) | |
else: | |
cleaned.extend([None] * count) | |
current_note = note | |
count = 1 | |
return cleaned | |
def basic_to_midi(input_file, tempo_bpm=120): | |
wav, sr = librosa_load(input_file) | |
audio_duration = len(wav) / sr | |
pitches, magnitudes = piptrack(y=wav, sr=sr, hop_length=512) | |
midi_sequence = clean_midi_sequence(smooth_pitch_sequence(pitches, magnitudes)) | |
total_frames = len(midi_sequence) | |
ticks_per_beat = 480 | |
tempo = bpm2tempo(tempo_bpm) | |
ticks_per_second = (ticks_per_beat * tempo_bpm) / 60 | |
time_per_frame = max(1, round((audio_duration * ticks_per_second) / total_frames)) | |
midi_file = MidiFile(ticks_per_beat=ticks_per_beat) | |
track = MidiTrack() | |
midi_file.tracks.append(track) | |
track.append(MetaMessage('set_tempo', tempo=tempo)) | |
last_note = None | |
duration = 0 | |
for note in midi_sequence: | |
if note != last_note: | |
if last_note is not None: | |
track.append(Message('note_off', note=last_note, velocity=0, time=duration)) | |
duration = 0 | |
if note is not None: | |
track.append(Message('note_on', note=note, velocity=100, time=0)) | |
last_note = note | |
duration += time_per_frame | |
if last_note is not None: | |
track.append(Message('note_off', note=last_note, velocity=0, time=duration)) | |
midi_file.save("output.mid") | |
return "output.mid" | |
def spotify_to_midi(input_audio_path,tempo=120): | |
_, midi_data, _ = basic_pitch_predict(input_audio_path,midi_tempo=tempo) | |
midi_data.write("output.mid") | |
mid = MidiFile("output.mid") | |
for i, track in enumerate(mid.tracks): | |
mid.tracks[i] = [msg for msg in track if msg.type != 'program_change'] | |
mid.save("output.mid") | |
return "output.mid" | |
import gradio as gr | |
gr.TabbedInterface([gr.Interface(basic_to_midi,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(120,label="BPM")],gr.File(label="Midi File")),gr.Interface(spotify_to_midi,[gr.Audio(type="filepath",label="Input Audio"),gr.Number(120,label="BPM")],gr.File(label="Midi File")),gr.Interface(PretrainedModel(json_load(open(hf_hub_download("shethjenil/Audio2ViolinMidi","violin.json"))),hf_hub_download("shethjenil/Audio2ViolinMidi","violin_model.pt"),device).transcribe_music, [gr.Audio(label="Upload your Audio file",type="filepath"),gr.Number(32,label="Batch size"),gr.Radio(["spotify","rebab","tiktok"],value="spotify",label="Post Processing")],gr.File(label="Download MIDI file")),gr.Interface(Pop2Piano(device).audio2midi,[gr.Audio(label="Input Audio",type="filepath"),gr.Number(1, minimum=1, maximum=21, label="Composer"),gr.Number(2,label="Details in Piano"),gr.Number(1,label="Efficiency of Piano"),gr.Radio([1,2,4],label="steps per beat",value=2)],gr.File(label="MIDI File"))],["Basic","Medium","Advance","More Advance"]).launch() | |