import gradio as gr from transformers import AutoModel from miditok import MusicTokenizer import torch import numpy as np import pretty_midi from io import BytesIO import os import requests # URL of the file to download url = "https://raw.githubusercontent.com/urish/cinto/refs/heads/master/media/FluidR3%20GM.sf2" # Local filename to save the file filename = "FluidR3_GM.sf2" if not os.path.exists(filename): response = requests.get(url) response.raise_for_status() with open(filename, "wb") as file: file.write(response.content) def score_to_audio(score, sample_rate: int = 44100) -> tuple[int, np.ndarray]: """ Convert a symusic Score to integer PCM audio data. Args: score (Score): symusic Score object sample_rate (int): desired sample rate in Hz, defaults to 44100 Returns: tuple[int, np.ndarray]: Tuple of (sample_rate, audio_data as int16) """ # Get MIDI bytes and create MIDI object midi_data = BytesIO(score.dumps_midi()) pm = pretty_midi.PrettyMIDI(midi_data) # Synthesize to float array first # float_audio = pm.synthesize(fs=sample_rate) float_audio = pm.fluidsynth( fs=sample_rate, sf2_path="./FluidR3_GM.sf2" ) # Convert to 16-bit integer PCM # Scale to full int16 range (-32768 to 32767) int_audio = (float_audio * 32767).astype(np.int16) int_audio = np.trim_zeros(int_audio, "b") return sample_rate, int_audio device = "cuda" if torch.cuda.is_available() else "cpu" tokenizer = MusicTokenizer.from_pretrained("shikhr/music_maker") model = AutoModel.from_pretrained("shikhr/music_maker", trust_remote_code=True) model.to(device) def generate_music(): # Generate some music out = model.generate( torch.tensor([[1]]).to(device), max_new_tokens=400, temperature=1.0, top_k=None ) # Save the generated MIDI res = tokenizer(out[0].tolist()) res.dump_midi("output.mid") nx = score_to_audio(res) # print(nx) return "Generated" , nx demo = gr.Interface(generate_music, inputs=[], outputs=["text", "audio"], flagging_mode="never") if __name__ == "__main__": demo.launch()