如何在Python中播放和录制音频？

由于Python几乎可以做任何可以想象的事情，包括播放和录制音频。本文将让您熟悉一些Python库和使用这些库在Python中播放和录制声音的直接方法，并使用更多功能来换取一些额外的Python代码行。

大多数音频文件是 MP3 和 WAV 文件格式。 WAV 音频文件是最简单的数字音频格式，具有无损高录制率，因此与其他格式相比，WAV 文件较大。出于同样的原因，使用的 MP3 格式体积小，压缩文件时对整体音质几乎没有影响。此外，使用在互联网上广泛可用的开源和免费软件将 WAV 转换为 MP3 非常容易。

播放音频

下面提到的是一些Python库，您可以使用它们在Python中播放各种音频格式，包括 MP3 格式、WAV 格式，甚至 NumPy 数组。

方法 1：使用 Playsound

只需一行代码即可播放音频文件的即用型软件包。可以用它播放 WAV 或 MP3 文件。它是一个单一的函数模块，与播放声音无关。

playsound 库的文档提到它已经针对 WAV 和 MP3 文件进行了测试，但也可以与其他文件格式一起使用，这些文件的测试由用户决定。 playsound 模块只包含一件事——函数（也称为）playsound。

以下是播放文件的代码行：

Python3

#import the library
from playsound import playsound
 
playsound('full_path/filename.mp3')

Python3

# import library
import simpleaudio as sa
 
# to check all the functions in succession
# to verify the installation
import simpleaudio.functionchecks as fc
fc.run_all()
 
# Path to file
f_name = 'myfile.wav'
 
# create WaveObject instances
# directly from WAV files on disk
wave_obj = sa.WaveObject.from_wave_file(f_name)   
 
# Audio playback
play = wave_obj.play()
           
# To stop after playing the whole audio
play.wait_done() 
play.stop()

Python3

import numpy as np
import simplesound as sa
# Note frequecies
first_freq = 400 
nxt_freq = first_freq * 2 ** (7 / 12)
 
# samples per second
smpl_rate = 44100
# Note duration in seconds
seconds = 3 
 
# Generate array(timesteps) with
# seconds*sample_rate steps,
# ranging between 0 and seconds
arr = np.linspace(0, seconds, seconds * smpl_rate, False)
 
# Generate a 400Hz Sine wave
first_note = np.sin(first_freq * arr * 2 * np.pi)
nxt_note = np.sin(nxt_freq * arr * 2 * np.pi)
 
# merging the notes
tape = np.hstack((first_note,nxt_note))
 
# normalizing to 16-bit range
# after concatenating the note notes
tape *= 32767 / np.max(np.abs(tape))
 
# Converting to 16-bit data
tape = tape.astype(np.int16)
 
# Start audio
play = sa.play_buffer(tape, 1, 2, smpl_rate)
 
# Wait for audio playback to finish before exiting
play.wait_done()
play.stop()

Python3

#Import windound
import winsound
 
winsound.PlaySound(path_to_file, winsound.SND_FILENAME)

Python3

#Import windound
import winsound
 
#Beep at frequency = 5000 Hz for duration of 1000 ms
winsound.Beep(5000, 1000) 
 
#windows exit sound after completion of above
winsound.PlaySound("SystemExit", winsound.SND_ALIAS)

Python3

# Import libraries
import sounddevice as sd
import soundfile as sf
 
# Extract data and sampling rate from file
array, smp_rt = sf.read(path_of_file, dtype = 'float32') 
 
# start the playback
sd.play(array, smp_rt)
 
# Wait until file is done playing
status = sd.wait() 
 
# stop the sound
sd.stop()

Python3

from pydub import AudioSegment
from pydub.playback import play
 
tape = AudioSegment.from_file('path_to_myfile.wav', format='wav')
tape = AudioSegment.from_wav('path_to_myfile.wav')
 
play(tape)

Python3

''' Play a WAVE file '''
import pyaudio
import wave
 
filename = 'path-to_file.wav'
 
# Set chunk size of 1024 samples per data frame
chunk = 1024 
 
# Open the soaudio/sound file
af = wave.open(filename, 'rb')
 
# Create an interface to PortAudio
pa = pyaudio.PyAudio()
 
# Open a .Stream object to write the WAV file
# 'output = True' indicates that the
# sound will be played rather than
# recorded and opposite can be used for recording
stream = pa.open(format = pa.get_format_from_width(af.getsampwidth()),
                channels = af.getnchannels(),
                rate = af.getframerate(),
                output = True)
 
# Read data in chunks
rd_data = af.readframes(chunk)
 
# Play the sound by writing the audio
# data to the Stream using while loop
while rd_data != '':
    stream.write(rd_data)
    rd_data = af.readframes(chunk)
 
# Close and terminate the stream
stream.stop_stream()
stream.close()
pa.terminate()

Python3

# import required libraries
import sounddevice as sd
from scipy.io.wavfile import write
import wavio as wv
 
# Sampling frequency
frequency = 44400
 
# Recording duration in seconds
duration = 3.5
 
# to record audio from
# sound-device into a Numpy
recording = sd.rec(int(duration * frequency),
                   samplerate = freq, channels = 2)
 
# Wait for the audio to complete
sd.wait()
 
# using scipy to save the recording in .wav format
# This will convert the NumPy array
# to an audio file with the given sampling frequency
write("recording0.wav", freq, recording)
 
# using wavio to save the recording in .wav format
# This will convert the NumPy array to an audio
# file with the given sampling frequency
wv.write("recording1.wav", recording, freq, sampwidth=2)

Python3

import sounddevice as sd
 
sd.default.samplerate = 4400
sd.default.channels = 2
 
myrecording = sd.rec(int(duration * fs))
 
# change the data type: pass a new argument in .rec() of dtype
# myrecording = sd.rec(int(duration * fs), dtype='float64')
 
sd.wait()

Python3

import sounddevice as sd
import numpy as np
 
smpl_rate = 44100
 
my_arr = np.random.uniform(-1,1,smpl_rate)
recordd= sd.playrec(my_arr, smpl_rate, channels=2)
 
sd.wait()

Python3

import pyaudio
import wave
 
# Record in chunks of 1024 samples
chunk = 1024 
 
# 16 bits per sample
sample_format = pyaudio.paInt16 
chanels = 2
 
# Record at 44400 samples per second
smpl_rt = 44400 
seconds = 4
filename = "path_of_file.wav"
 
# Create an interface to PortAudio
pa = pyaudio.PyAudio() 
 
stream = pa.open(format=sample_format, channels=chanels,
                 rate=smpl_rt, input=True,
                 frames_per_buffer=chunk)
 
print('Recording...')
 
# Initialize array taht be used for storing frames
frames = [] 
 
# Store data in chunks for 8 seconds
for i in range(0, int(smpl_rt / chunk * seconds)):
    data = stream.read(chunk)
    frames.append(data)
 
# Stop and close the stream
stream.stop_stream()
stream.close()
 
# Terminate - PortAudio interface
pa.terminate()
 
print('Done !!! ')
 
# Save the recorded data in a .wav format
sf = wave.open(filename, 'wb')
sf.setnchannels(chanels)
sf.setsampwidth(pa.get_sample_size(sample_format))
sf.setframerate(smpl_rt)
sf.writeframes(b''.join(frames))
sf.close()

输出：

方法2：使用Simpleaudio。

示例 1：

它是一个跨平台的Python库，用于播放单声道和立体声 WAV 文件，没有其他音频播放依赖项。 macOS、Windows 和 Linux 正式支持Python 3.7 及更高版本。

以下是播放 .wav 格式文件的简单代码，尽管与上述库相比，它消耗了更多的代码行：

蟒蛇3

# import library
import simpleaudio as sa
 
# to check all the functions in succession
# to verify the installation
import simpleaudio.functionchecks as fc
fc.run_all()
 
# Path to file
f_name = 'myfile.wav'
 
# create WaveObject instances
# directly from WAV files on disk
wave_obj = sa.WaveObject.from_wave_file(f_name)   
 
# Audio playback
play = wave_obj.play()
           
# To stop after playing the whole audio
play.wait_done() 
play.stop()

输出：

示例 2：

simpleaudio 可用于播放 NumPy 和Python数组和字节对象，使用 simpleaudio.play_buffer() Numpy 数组可用于存储音频，但有一些关键要求。如果它们要存储立体声音频，则该数组必须有两列，每列包含一个音频数据通道。它们还必须具有带符号的 16 位整数 d 类型，因此样本幅度值必须介于 -32768 到 32767 之间。以下是生成 NumPy 数组并使用 simpleaudio.play_buffer() 播放它的代码。

蟒蛇3

import numpy as np
import simplesound as sa
# Note frequecies
first_freq = 400 
nxt_freq = first_freq * 2 ** (7 / 12)
 
# samples per second
smpl_rate = 44100
# Note duration in seconds
seconds = 3 
 
# Generate array(timesteps) with
# seconds*sample_rate steps,
# ranging between 0 and seconds
arr = np.linspace(0, seconds, seconds * smpl_rate, False)
 
# Generate a 400Hz Sine wave
first_note = np.sin(first_freq * arr * 2 * np.pi)
nxt_note = np.sin(nxt_freq * arr * 2 * np.pi)
 
# merging the notes
tape = np.hstack((first_note,nxt_note))
 
# normalizing to 16-bit range
# after concatenating the note notes
tape *= 32767 / np.max(np.abs(tape))
 
# Converting to 16-bit data
tape = tape.astype(np.int16)
 
# Start audio
play = sa.play_buffer(tape, 1, 2, smpl_rate)
 
# Wait for audio playback to finish before exiting
play.wait_done()
play.stop()

输出：

方法三：使用winsound。

示例 1：

它是访问基本声音播放机制的内置模块。它只允许您播放 WAV 文件（它不支持任何其他文件格式）或使您的扬声器发出哔哔声，但它只能在 Windows 上运行，顾名思义就是 WINsound。它是内置模块，因此无需额外安装。

蟒蛇3

#Import windound
import winsound
 
winsound.PlaySound(path_to_file, winsound.SND_FILENAME)

输出：

示例 2：

它还可用于使扬声器发出哔哔声或播放 Windows 默认声音。在下面的代码中，5000Hz 的哔哔声会在 Windows 退出声音之后播放 1000 毫秒。

蟒蛇3

#Import windound
import winsound
 
#Beep at frequency = 5000 Hz for duration of 1000 ms
winsound.Beep(5000, 1000) 
 
#windows exit sound after completion of above
winsound.PlaySound("SystemExit", winsound.SND_ALIAS)

输出：

该库的主要缺点是它仅适用于 Windows 操作系统用户，并且不支持播放任何其他文件而不是 WAV 格式。

方法4：使用sounddevice。

这个Python模块为 PortAudio 库提供绑定和一些方便的函数来播放和记录包含音频信号的 NumPy 数组。它适用于 Linux、macOS 和 Windows 操作系统。

在下面的代码中，包含“ sf.read() ”的一行提取出所有原始音频数据以及存储在其 RIFF 标头中的文件的采样率；并且“ sounddevice.wait() ”确保脚本在“ sd.play(data,sr) ”播放完音频后终止。

蟒蛇3

# Import libraries
import sounddevice as sd
import soundfile as sf
 
# Extract data and sampling rate from file
array, smp_rt = sf.read(path_of_file, dtype = 'float32') 
 
# start the playback
sd.play(array, smp_rt)
 
# Wait until file is done playing
status = sd.wait() 
 
# stop the sound
sd.stop()

输出：

方法五：使用pydub。

尽管 pydub 可以轻松打开和保存 WAV 文件而无需任何其他依赖项，但必须至少预装一个来自（simpleaudio、pyaudio、ffplay 和 avplay）的音频播放包。它为音频操作提供了纯Python实现。

以下代码导入两个库，第一个库用于加载文件，第二个库用于播放加载的文件。此外，还提供了两种加载 .wav 文件的方法。

蟒蛇3

from pydub import AudioSegment
from pydub.playback import play
 
tape = AudioSegment.from_file('path_to_myfile.wav', format='wav')
tape = AudioSegment.from_wav('path_to_myfile.wav')
 
play(tape)

输出：

方法六：使用pyaudio。

PyAudio 是另一个用于Python的跨平台音频库。虽然它比简单的音频库具有更多的功能，例如录制和连续音频流，但它很大程度上依赖于 PortAudio，这导致安装更加复杂。它还为 PortAudio 提供Python绑定，PortAudio 是由 python-sounddevice 提供的跨平台音频 I/O 库。借助 PyAudio，您可以轻松地使用Python在各种平台上播放和录制音频。

蟒蛇3

''' Play a WAVE file '''
import pyaudio
import wave
 
filename = 'path-to_file.wav'
 
# Set chunk size of 1024 samples per data frame
chunk = 1024 
 
# Open the soaudio/sound file
af = wave.open(filename, 'rb')
 
# Create an interface to PortAudio
pa = pyaudio.PyAudio()
 
# Open a .Stream object to write the WAV file
# 'output = True' indicates that the
# sound will be played rather than
# recorded and opposite can be used for recording
stream = pa.open(format = pa.get_format_from_width(af.getsampwidth()),
                channels = af.getnchannels(),
                rate = af.getframerate(),
                output = True)
 
# Read data in chunks
rd_data = af.readframes(chunk)
 
# Play the sound by writing the audio
# data to the Stream using while loop
while rd_data != '':
    stream.write(rd_data)
    rd_data = af.readframes(chunk)
 
# Close and terminate the stream
stream.stop_stream()
stream.close()
pa.terminate()

输出：

录制音频

现在只需切换到文章的录制模式。上面提到的库很少有相同的用途，播放和录音都可以统一解释，但对于许多库来说，可能会有点混乱。因此，这里首选不同的专用部分。

注意 - 在使用任何库进行录音之前，请确保您的设备的麦克风已实际连接并且已打开且未静音。可以使用操作系统功能和设置进行检查。

方法一、使用python-sounddevice

该库允许您播放（如上所述）和记录包含音频信号信息的 NumPy 数组。此模块需要 scipy 或 wavio 来保存录制的音频，这意味着在使用此包进行录制之前，应与 Numpy 一起预安装 scipy 或 wavio 库。

蟒蛇3

# import required libraries
import sounddevice as sd
from scipy.io.wavfile import write
import wavio as wv
 
# Sampling frequency
frequency = 44400
 
# Recording duration in seconds
duration = 3.5
 
# to record audio from
# sound-device into a Numpy
recording = sd.rec(int(duration * frequency),
                   samplerate = freq, channels = 2)
 
# Wait for the audio to complete
sd.wait()
 
# using scipy to save the recording in .wav format
# This will convert the NumPy array
# to an audio file with the given sampling frequency
write("recording0.wav", freq, recording)
 
# using wavio to save the recording in .wav format
# This will convert the NumPy array to an audio
# file with the given sampling frequency
wv.write("recording1.wav", recording, freq, sampwidth=2)

输出：

如音频部分所述， sounddevice 有一个默认选项来指定重复使用的频道和频率。之后无需将此选项作为参数传递给 sd.rec() 方法。以下代码表示相同，还可以将记录数组的数据类型从默认的 float32 类型更改为其他类型。

蟒蛇3

import sounddevice as sd
 
sd.default.samplerate = 4400
sd.default.channels = 2
 
myrecording = sd.rec(int(duration * fs))
 
# change the data type: pass a new argument in .rec() of dtype
# myrecording = sd.rec(int(duration * fs), dtype='float64')
 
sd.wait()

同时播放和录音

同时播放名为 my_arr 和 Record 的数组。这里的采样率是 smpl_rate

蟒蛇3

import sounddevice as sd
import numpy as np
 
smpl_rate = 44100
 
my_arr = np.random.uniform(-1,1,smpl_rate)
recordd= sd.playrec(my_arr, smpl_rate, channels=2)
 
sd.wait()

方法二：使用pyaudio。

如上所述，我们通过读取 pyaudio.Stream() 使用 pyaudio 播放音频。要录制音频，我们必须写入同一个流。以下是录制几秒钟的音频并将其保存到 .wav 文件的代码：

蟒蛇3

import pyaudio
import wave
 
# Record in chunks of 1024 samples
chunk = 1024 
 
# 16 bits per sample
sample_format = pyaudio.paInt16 
chanels = 2
 
# Record at 44400 samples per second
smpl_rt = 44400 
seconds = 4
filename = "path_of_file.wav"
 
# Create an interface to PortAudio
pa = pyaudio.PyAudio() 
 
stream = pa.open(format=sample_format, channels=chanels,
                 rate=smpl_rt, input=True,
                 frames_per_buffer=chunk)
 
print('Recording...')
 
# Initialize array taht be used for storing frames
frames = [] 
 
# Store data in chunks for 8 seconds
for i in range(0, int(smpl_rt / chunk * seconds)):
    data = stream.read(chunk)
    frames.append(data)
 
# Stop and close the stream
stream.stop_stream()
stream.close()
 
# Terminate - PortAudio interface
pa.terminate()
 
print('Done !!! ')
 
# Save the recorded data in a .wav format
sf = wave.open(filename, 'wb')
sf.setnchannels(chanels)
sf.setsampwidth(pa.get_sample_size(sample_format))
sf.setframerate(smpl_rt)
sf.writeframes(b''.join(frames))
sf.close()

输出：