-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDataAugment_Librosa.py
More file actions
99 lines (85 loc) · 3.68 KB
/
DataAugment_Librosa.py
File metadata and controls
99 lines (85 loc) · 3.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import os
import random
import numpy as np
import librosa
import soundfile as sf
from glob import glob
def get_augmented_filename(original_file, global_number):
base_name = os.path.splitext(os.path.basename(original_file))[0]
if '_' in base_name:
parts = base_name.rsplit('_', 1)
category = parts[0]
orig_index = parts[1]
else:
category = base_name
orig_index = "00"
try:
orig_index_int = int(orig_index)
except ValueError:
orig_index_int = 0
new_filename = f"{category}_{global_number:02d}_aug{orig_index_int:02d}.wav"
return new_filename
def time_stretch_audio(y, rate):
D = librosa.stft(y)
D_stretch = librosa.phase_vocoder(D, rate=rate)
return librosa.istft(D_stretch)
def spectral_perturbation_audio(y, sr, noise_factor=0.008):
D = librosa.stft(y)
magnitude, phase = np.abs(D), np.angle(D)
noise = noise_factor * np.random.randn(*magnitude.shape)
magnitude_noisy = magnitude + noise
D_noisy = magnitude_noisy * np.exp(1j * phase)
return librosa.istft(D_noisy)
def augment_audio_instance(y, sr):
method = random.choice(['time_stretch', 'spectral_perturb'])
if method == 'time_stretch':
rate = random.uniform(0.8, 1.2)
return time_stretch_audio(y, rate)
else:
noise_factor = random.uniform(0.001, 0.01)
return spectral_perturbation_audio(y, sr, noise_factor)
def parse_category_and_drum(file_name):
parts = file_name.split('_')
if len(parts) >= 2:
return parts[0], parts[1]
return "Unknown", "Unknown"
def balance_and_augment(input_dir, output_dir, target_count=30):
audio_files = sorted(glob(os.path.join(input_dir, '**', '*.wav'), recursive=True))
print(f"总共发现音频文件:{len(audio_files)}")
# 按 (drummachine, category) 分类
category_dict = {}
for file in audio_files:
base = os.path.basename(file)
cat, drum = parse_category_and_drum(base)
key = (drum, cat)
category_dict.setdefault(key, []).append(file)
for (drum, cat), files in category_dict.items():
output_subdir = os.path.join(output_dir, drum, cat)
os.makedirs(output_subdir, exist_ok=True)
print(f"处理: {drum}/{cat},已有样本: {len(files)}")
# 保存原始文件
for file in files:
y, sr = librosa.load(file, sr=None)
name = os.path.basename(file)
out_path = os.path.join(output_subdir, name)
sf.write(out_path, y, sr, subtype='PCM_16')
# 增强补齐
current_count = len(files)
aug_index = 1
while current_count < target_count:
file = random.choice(files)
y, sr = librosa.load(file, sr=None)
y_aug = augment_audio_instance(y, sr)
global_number = current_count + 1
new_filename = get_augmented_filename(file, global_number)
out_path = os.path.join(output_subdir, new_filename)
sf.write(out_path, y_aug, sr, subtype='PCM_16')
print(f"[+] 增强并保存: {new_filename}")
current_count += 1
aug_index += 1
print(f"[✓] 处理完成: {drum}/{cat}(共 {current_count} 个样本)")
if __name__ == "__main__":
input_dir = "/Users/rifi_2001/Documents/UOE Sem-2/Audio Machine Learning/Audio Datasets/In-Use/Linn_RawData/Perc" # 修改为你的输入文件夹路径
output_dir = "/Users/rifi_2001/Documents/UOE Sem-2/Audio Machine Learning/Audio Datasets/In-Use/Linn_RawDataNew/Perc" # 修改为你的输出路径
target_count = 150 # 每个类别希望达到的最小样本数量
balance_and_augment(input_dir, output_dir, target_count)