# 【python】《多媒体技术与应用》实验报告「数字音频处理」

《多媒体技术与应用》实验报告

2022/4/25

1.掌握数字音频的读取与打开；

2.掌握数字音频信号的频谱分析；

3.验证 PCM 编码算法。

PCM 的实现主要包括三个步骤完成：抽样、量化、编码。分别完成时间上离散、幅度上离散、及量化信号的二进制表示。根据 CCITT 的建议，为改善小信号量化性能，采用压扩非均匀量化，有两种建议方式，分别为 A 律和 m 律方式，本设计采用了 A 律方式。

1. 打开音频信号文件，获取音高、音量等信息；

2. 对打开的文件进行频谱分析；

3. 验证 PCM 算法。

4-1 音量

``````import math
import wave
import pylab as pl
import numpy as np

# method 1: absSum
def calVolume(waveData, frameSize, overLap):
wlen = len(waveData)
step = frameSize - overLap
frameNum = int(math.ceil(wlen * 1.0 / step))
curFrame = waveData[np.arange(i * step, min(i * step + frameSize, wlen))]
curFrame = curFrame - np.median(curFrame)  # zero-justified
volume[i] = np.sum(np.abs(curFrame))
return volume

# method 2: 10 times log10 of square sum
def calVolumeDB(waveData, frameSize, overLap):
wlen = len(waveData)
step = frameSize - overLap
frameNum = int(math.ceil(wlen * 1.0 / step))
curFrame = waveData[np.arange(i * step, min(i * step + frameSize, wlen))]
curFrame = curFrame - np.mean(curFrame)  # zero-justified
volume[i] = 10 * np.log10(np.sum(curFrame * curFrame))
return volume

# ============ test the algorithm =============
# read wave file and get parameters.
fw = wave.open('sunday.wav', 'r')
params = fw.getparams()
print(params)
nchannels, sampwidth, framerate, nframes = params[:4]
waveData = np.frombuffer(strData, dtype=np.int16)
waveData = waveData * 1.0 / max(abs(waveData))  # normalization
fw.close()

# calculate volume
frameSize = 256
overLap = 128
volume11 = calVolume(waveData, frameSize, overLap)
volume12 = calVolumeDB(waveData, frameSize, overLap)

# plot the wave
time = np.arange(0, nframes) * (1.0 / framerate)
time2 = np.arange(0, len(volume11)) * (frameSize - overLap) * 1.0 / framerate
pl.figure(figsize=(15, 9))
pl.subplot(311)
pl.plot(time, waveData)
pl.ylabel("Amplitude")
pl.subplot(312)
pl.plot(time2, volume11)
pl.ylabel("absSum")
pl.subplot(313)
pl.plot(time2, volume12, c="g")
pl.ylabel("Decibel(dB)")
pl.xlabel("time (seconds)")
pl.show()
``````

4-2 音高

``````import wave
import numpy as np
import pylab as pl

# ============ test the algorithm =============
# read wave file and get parameters.
fw = wave.open('Ring08.wav', 'rb')
params = fw.getparams()
print(params)
nchannels, sampwidth, framerate, nframes = params[:4]
# waveData = np.fromstring(strData, dtype=np.int16)
waveData = np.frombuffer(strData, np.int16)
waveData = waveData * 1.0 / max(abs(waveData))  # normalization
fw.close()

# plot the wave
time = np.arange(0, len(waveData)) * (1.0 / framerate)

index1 = 10000.0 / framerate
index2 = 10512.0 / framerate
index3 = 15000.0 / framerate
index4 = 15512.0 / framerate

pl.subplot(311)
pl.plot(time, waveData)
pl.plot([index1, index1], [-1, 1], 'r')
pl.plot([index2, index2], [-1, 1], 'r')
pl.plot([index3, index3], [-1, 1], 'g')
pl.plot([index4, index4], [-1, 1], 'g')
pl.xlabel("time (seconds)")
pl.ylabel("Amplitude")

pl.subplot(312)
pl.plot(np.arange(512), waveData[10000:10512], 'r')
pl.plot([59, 59], [-1, 1], 'b')
pl.plot([169, 169], [-1, 1], 'b')
print(1 / ((169 - 59) * 1.0 / framerate))
pl.xlabel("index in 1 frame")
pl.ylabel("Amplitude")

pl.subplot(313)
pl.plot(np.arange(512), waveData[15000:15512], 'g')
pl.xlabel("index in 1 frame")
pl.ylabel("Amplitude")
pl.show()
``````

4-3 波形频谱

``````import librosa
import librosa.display
import matplotlib.pyplot as plt

# extract mel spectrogram feature
melspec = librosa.feature.melspectrogram(y, sr, n_fft=1024, hop_length=512, n_mels=128)
# convert to log scale
logmelspec = librosa.power_to_db(melspec)
plt.figure(figsize=(15, 10))
# plot a wavform
plt.subplot(2, 1, 1)
librosa.display.waveshow(y, sr)
plt.title('Beat wavform')
# plot mel spectrogram
plt.subplot(2, 1, 2)
librosa.display.specshow(logmelspec, sr=sr, x_axis='time', y_axis='mel')
plt.title('Mel spectrogram')
plt.tight_layout()  # 保证图不重叠
plt.show()
``````

4-4 PCM

``````import numpy as np
import librosa
import matplotlib.pyplot as plt

def PCM_encode(x):
n = len(x)
out = np.zeros((n, 8))
for i in range(n):
# 符号位
if x[i] > 0:
out[i, 0] = 1
else:
out[i, 0] = 0
# 数据位
if abs(x[i]) < 32:
out[i, 1], out[i, 2], out[i, 3], step, st = 0, 0, 0, 2, 0
elif abs(x[i]) < 64:
out[i, 1], out[i, 2], out[i, 3], step, st = 0, 0, 1, 2, 32
elif abs(x[i]) < 128:
out[i, 1], out[i, 2], out[i, 3], step, st = 0, 1, 0, 4, 64
elif abs(x[i]) < 256:
out[i, 1], out[i, 2], out[i, 3], step, st = 0, 1, 1, 8, 128
elif abs(x[i]) < 512:
out[i, 1], out[i, 2], out[i, 3], step, st = 1, 0, 0, 16, 256
elif abs(x[i]) < 1024:
out[i, 1], out[i, 2], out[i, 3], step, st = 1, 0, 1, 32, 512
elif abs(x[i]) < 2048:
out[i, 1], out[i, 2], out[i, 3], step, st = 1, 1, 0, 64, 1024
else:
out[i, 1], out[i, 2], out[i, 3], step, st = 1, 1, 1, 128, 2048

if abs(x[i]) >= 4096:
out[i, 1:] = np.array([1, 1, 1, 1, 1, 1])
else:
tmp = bin(int((abs(x[i]) - st) / step)).replace('0b', '')
tmp = '0' * (4 - len(tmp)) + tmp
t = [int(k) for k in tmp]
out[i, 4:] = t
return out.reshape(8 * n)

def PCM_decode(ins, v):
inn = ins.reshape(len(ins) // 8, 8)
slot = np.array([0, 32, 64, 128, 256, 512, 1024, 2048])
step = np.array([2, 2, 4, 8, 16, 32, 64, 128])
out = np.zeros(len(ins) // 8)
for i in range(inn.shape[0]):
sgn = 2 * inn[i, 0] - 1
tmp = int(inn[i, 1] * 4 + inn[i, 2] * 2 + inn[i, 3])
st = slot[tmp]
dt = (inn[i, 4] * 8 + inn[i, 5] * 4 + inn[i, 6] * 2 + inn[i, 7]) * step[tmp] + 0.5 * step[tmp]
out[i] = sgn * (st + dt) / 4096 * v
return out

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

sxx = np.array(list(map(int, data * 4096)))

y = PCM_encode(sxx)

yy = PCM_decode(y, 1)

plt.subplot(3, 1, 1)
plt.plot(data)
plt.title('编码前')

plt.subplot(3, 1, 2)
plt.plot(yy)
plt.title('解码后')

plt.subplot(3, 1, 3)
plt.plot(yy - data)
plt.title('误差')
plt.show()
``````

 4-1 音量 aeiou.wav sunday.wav 结果
 4-2 音高 aeiou.wav sunday.wav Ring08.wav 结果

 4-3 波形频谱 aeiou.wav sunday.wav PCM.wav 结果
 4-4 PCM PCM.wav sunday.wav Ring08.wav 结果