-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocessor.py
More file actions
89 lines (67 loc) · 2.38 KB
/
preprocessor.py
File metadata and controls
89 lines (67 loc) · 2.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import librosa
import matplotlib.pyplot as plt
import librosa.display
import numpy as np
from pysndfx import AudioEffectsChain
from scipy.signal import find_peaks
fx = (
AudioEffectsChain()
.limiter(20.0)
.lowpass(2500, 2)
.highpass(100)
.equalizer(300, db=15.0)
)
#denoise = AudioEffectsChain().lowpass(3000)
def _remove_until(l, until):
t = list(l)
n = len(t)
while n > until:
m = np.full((n, n), np.inf)
for i in range(n):
for j in range(i+1, n):
m[i, j] = t[j] - t[i]
to_adapt, to_remove = np.unravel_index(m.argmin(), m.shape)
t[to_adapt] = (t[to_adapt]+t[to_remove])/2
del t[to_remove]
n = len(t)
return t
def _extract_labels(filename):
return os.path.splitext(os.path.basename(filename))[0]
def trim(filename, output, SHOW_PLOTS=False):
y, sr = librosa.load(filename)
FOLGA = int(sr)
N_SLICES = 4
y = librosa.to_mono(y)
y = librosa.util.normalize(y)
y, indexes = librosa.effects.trim(y, top_db=24, frame_length=2)
fxy = fx(y)
fxy[np.abs(fxy) < 0.5] = 0
peaks, _ = find_peaks(fxy, height=0.5, distance=sr)
peaks = _remove_until(peaks, N_SLICES)
if SHOW_PLOTS:
peak_times = librosa.samples_to_time(peaks)
plt.title(filename)
librosa.display.waveplot(fxy, color='m')
librosa.display.waveplot(y, color='orange')
plt.vlines(peak_times, -1, 1, color='k', linestyle='--', linewidth=6, alpha=0.9, label='Segment boundaries')
plt.show()
return
labels = _extract_labels(filename)
if not os.path.exists('%s/%s' % (output, labels)):
os.mkdir('%s/%s' % (output, labels))
for i in range(N_SLICES):
if(i >= len(peaks)):
continue
p = peaks[i]
left = int(round(max(0, p - FOLGA)))
right = int(round(min(p + FOLGA, len(y)-1)))
audio = y[left:right]
if(np.any(audio)):
_, [left, right] = librosa.effects.trim(audio, top_db=12, frame_length=2)
left = int(round(max(0, left - FOLGA//4)))
right = int(round(min(right + FOLGA//4, len(y)-1)))
audio_trim = audio[left:right]
audio_trim = librosa.util.normalize(audio_trim)
librosa.output.write_wav('%s/%s/%d-%s.wav' % (output, labels, i, labels[i]), audio_trim, sr=sr)
#trim('amostras/abn7.wav', 'output')