In this tutorial, we will use an example to show you how to split a big wave file to some clips (small wave files) with same length. For example, there is a wave file with 500 seconds, we plan to split it to some small files, the duration of each small wave file is 5 senconds. How to do?
In pytorch, we can use the example code below:
import torch
import torchaudio
def load_audio(wave_file):
wave_data, sr = torchaudio.load(wave_file)
return wave_data, sr
def save_audio(wave_data, wave_file):
torch.save(wave_data.clone(), wave_file, _use_new_zipfile_serialization=True)
def get_audio_clips(wave_file, min_sec = 5):
wave_data, sr = load_audio(wave_file)
min_clip_len = min_sec * sr
wave_len = wave_data.shape[1]
if wave_len < min_clip_len:
repeat_num = min_clip_len // wave_len + 1
wave_data = wave_data.repeat(1, repeat_num)
wave_data = wave_data[:, :min_clip_len]
else:
clip_num = wave_len // min_clip_len + 1
max_wave_len = clip_num * min_clip_len
wave_data = wave_data.repeat(1, 2)
wave_data = wave_data[:, :max_wave_len]
feats = torch.split(wave_data, min_clip_len, dim = 1)
return feats
- import torch
- import torchaudio
- def load_audio(wave_file):
- wave_data, sr = torchaudio.load(wave_file)
- return wave_data, sr
- def save_audio(wave_data, wave_file):
- torch.save(wave_data.clone(), wave_file, _use_new_zipfile_serialization=True)
- def get_audio_clips(wave_file, min_sec = 5):
- wave_data, sr = load_audio(wave_file)
- min_clip_len = min_sec * sr
- wave_len = wave_data.shape[1]
- if wave_len < min_clip_len:
- repeat_num = min_clip_len // wave_len + 1
- wave_data = wave_data.repeat(1, repeat_num)
- wave_data = wave_data[:, :min_clip_len]
- else:
- clip_num = wave_len // min_clip_len + 1
- max_wave_len = clip_num * min_clip_len
- wave_data = wave_data.repeat(1, 2)
- wave_data = wave_data[:, :max_wave_len]
- feats = torch.split(wave_data, min_clip_len, dim = 1)
- return feats
import torch
import torchaudio
def load_audio(wave_file):
wave_data, sr = torchaudio.load(wave_file)
return wave_data, sr
def save_audio(wave_data, wave_file):
torch.save(wave_data.clone(), wave_file, _use_new_zipfile_serialization=True)
def get_audio_clips(wave_file, min_sec = 5):
wave_data, sr = load_audio(wave_file)
min_clip_len = min_sec * sr
wave_len = wave_data.shape[1]
if wave_len < min_clip_len:
repeat_num = min_clip_len // wave_len + 1
wave_data = wave_data.repeat(1, repeat_num)
wave_data = wave_data[:, :min_clip_len]
else:
clip_num = wave_len // min_clip_len + 1
max_wave_len = clip_num * min_clip_len
wave_data = wave_data.repeat(1, 2)
wave_data = wave_data[:, :max_wave_len]
feats = torch.split(wave_data, min_clip_len, dim = 1)
return feats
We can use this code as follows:
wave_file = "music-jamendo-0039.wav"
feats = get_audio_clips(wave_file)
print(len(feats))
for feat in feats:
print(feat.shape)
- wave_file = "music-jamendo-0039.wav"
- feats = get_audio_clips(wave_file)
- print(len(feats))
- for feat in feats:
- print(feat.shape)
wave_file = "music-jamendo-0039.wav"
feats = get_audio_clips(wave_file)
print(len(feats))
for feat in feats:
print(feat.shape)
In this code, we will split music-jamendo-0039.wav to some 5 seconds small files. Run this code, we will see:
65
torch.Size([1, 80000])
torch.Size([1, 80000])
torch.Size([1, 80000])
torch.Size([1, 80000])
torch.Size([1, 80000])
torch.Size([1, 80000])
torch.Size([1, 80000])
- 65
- torch.Size([1, 80000])
- torch.Size([1, 80000])
- torch.Size([1, 80000])
- torch.Size([1, 80000])
- torch.Size([1, 80000])
- torch.Size([1, 80000])
- torch.Size([1, 80000])
65
torch.Size([1, 80000])
torch.Size([1, 80000])
torch.Size([1, 80000])
torch.Size([1, 80000])
torch.Size([1, 80000])
torch.Size([1, 80000])
torch.Size([1, 80000])
To understand torch.split() and torchaudio.load(), you can see:
Understand torch.split(): Split Tensor into Chunks – PyTorch Tutorial
Understand torchaudio.load(): Read Audio with Examples – TorchAudio Tutorial