Reputation: 149
Substitute three numpy for the audio and combine them to get the max-min average. I am getting an error with this, what should I do?
import torch
import torchaudio
import torchaudio.transforms as T
import os
import requests
import librosa
import matplotlib.pyplot as plt
# 音声の保存
_SAMPLE_DIR = "_sample_data"
SAMPLE_WAV_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"
SAMPLE_WAV_PATH = os.path.join(_SAMPLE_DIR, "speech.wav")
def plot_spectrogram(spec, title=None, ylabel="freq_bin", aspect="auto", xmax=None):
fig, axs = plt.subplots(1, 1)
axs.set_title(title or "Spectrogram (db)")
axs.set_ylabel(ylabel)
axs.set_xlabel("frame")
im = axs.imshow(librosa.power_to_db(spec), origin="lower", aspect=aspect)
if xmax:
axs.set_xlim((0, xmax))
fig.colorbar(im, ax=axs)
plt.show(block=False)
def synthesis(sigList):
maxLength = 0
tmpLength = 0
tmpArray = []
#最大長の音声を探索する
for i, data in enumerate(sigList):
if len(data) > tmpLength:
maxLength = len(data)
tmpLength = len(data)
index = i
#最大長の音声の長さの0埋め配列を定義
sig = np.zeros(maxLength)
for i in sigList:
tmp = i.tolist() #numpy→list
#全ての音声を最大長の音声に合わせて0埋めする
for data in range(maxLength - len(i)):
tmp.append(0)
tmpArray.append(tmp)
#配列3つを合成する
sig = np.array(tmpArray[0]) + np.array(tmpArray[1]) + np.array(tmpArray[2])
return sig
def min_max(x, axis=None):
min = x.min(axis=axis, keepdims=True)
max = x.max(axis=axis, keepdims=True)
try:
z = (x - min) / (max - min)
except ZeroDivisionError:
z = (x - min) / min
return z
waveform, sample_rate = torchaudio.load(filepath=SAMPLE_WAV_URL)
n_fft = 1024
win_length = None
hop_length = 512
window_fn = torch.hann_window
waveforms = waveform.numpy()
k = waveforms
for i in range(2):
waveforms = np.concatenate([waveforms,k],0)
spectrogram = T.Spectrogram(
n_fft=n_fft,
win_length=win_length,
hop_length=hop_length,
window_fn=window_fn,
power=2.0,
)
sig = min_max(synthesis(waveforms))
spec = spectrogram(sig)
plot_spectrogram(spec[0], title='torchaudio')
spec = spectrogram(sig) This is the line where the error occurs. Detailed error is TypeError
Traceback (most recent call last)
<ipython-input-44-a0a6c4ba7770> in <module>
70 sig = min_max(synthesis(waveforms))
71
---> 72 spec = spectrogram(sig)
73 plot_spectrogram(spec[0], title='torchaudio')
2 frames
/usr/local/lib/python3.7/dist-packages/torchaudio/functional/functional.py in spectrogram(waveform, pad, window, n_fft, hop_length, win_length, power, normalized, center, pad_mode, onesided, return_complex)
106
107# pack batch
--> 108 shape = waveform.size()
109 waveform = waveform.reshape(-1, shape[-1])
110
TypeError: 'int' object is not callable
Upvotes: 0
Views: 863
Reputation: 9152
According to the docs for Torchaudio Spectrogram
, the parameter that's passed to its return value (spectrogram()
in your code) needs to be a PyTorch Tensor
. In your code, you're giving it a Numpy array instead, because that's what your function synthesis()
returns.
You can convert a Numpy ndarray
into a Tensor
with torch.from_numpy
. For example:
spec = spectrogram(torch.from_numpy(sig))
Upvotes: 1