import gradio as gr
import torch
from spectro import wav_bytes_from_spectrogram_image
from diffusers import DiffusionPipeline
device = "cpu"
MODEL_ID = "Hyeon2/riffusion-musiccaps"
pipe = DiffusionPipeline.from_pretrained(MODEL_ID)
pipe = pipe.to(device)
def predict(prompt):
spec = pipe(prompt).images[0].convert("L")
print(spec)
wav = wav_bytes_from_spectrogram_image(spec)
with open("output.wav", "wb") as f:
f.write(wav[0].getbuffer())
return spec, "output.wav"
title = """
Riffusion-Musiccaps real-time music generation
Describe a musical prompt, generate music by getting a spectrogram image & sound.
"""
with gr.Blocks(css="style.css") as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
with gr.Row():
with gr.Column():
prompt_input = gr.Textbox(placeholder="A LoFi beat", label="Musical prompt", elem_id="prompt-in")
send_btn = gr.Button(value="Get a new spectrogram!", elem_id="submit-btn")
with gr.Column(elem_id="col-container-2"):
spectrogram_output = gr.Image(label="Spectrogram Image Result", elem_id="img-out")
sound_output = gr.Audio(type='filepath', label="Generated Audio", elem_id="music-out")
send_btn.click(predict, inputs=[prompt_input], outputs=[spectrogram_output, sound_output])
demo.queue(max_size=250).launch(debug=True, ssr_mode=False)