import gradio as gr import torch from spectro import wav_bytes_from_spectrogram_image from diffusers import DiffusionPipeline device = "cpu" MODEL_ID = "Hyeon2/riffusion-musiccaps" pipe = DiffusionPipeline.from_pretrained(MODEL_ID) pipe = pipe.to(device) def predict(prompt): spec = pipe(prompt).images[0].convert("L") print(spec) wav = wav_bytes_from_spectrogram_image(spec) with open("output.wav", "wb") as f: f.write(wav[0].getbuffer()) return spec, "output.wav" title = """

Riffusion-Musiccaps real-time music generation

Describe a musical prompt, generate music by getting a spectrogram image & sound.

""" with gr.Blocks(css="style.css") as demo: with gr.Column(elem_id="col-container"): gr.HTML(title) with gr.Row(): with gr.Column(): prompt_input = gr.Textbox(placeholder="A LoFi beat", label="Musical prompt", elem_id="prompt-in") send_btn = gr.Button(value="Get a new spectrogram!", elem_id="submit-btn") with gr.Column(elem_id="col-container-2"): spectrogram_output = gr.Image(label="Spectrogram Image Result", elem_id="img-out") sound_output = gr.Audio(type='filepath', label="Generated Audio", elem_id="music-out") send_btn.click(predict, inputs=[prompt_input], outputs=[spectrogram_output, sound_output]) demo.queue(max_size=250).launch(debug=True, ssr_mode=False)