This is a quick script to detect a run of silence at the start of an audio file.
It uses ffmpeg to extract the first 10 minutes as raw 16bit PCM with a single channel.
This is then turned into a numpy array, through which we search for the first nonzero int16.
(One could check with a small epsilon, rather than testing for identically zero.)
I wrote this for some DJ podcasts where occasionally there were a few minutes of silence
before the podcast proper began, and I wanted to run through a large collection of podcasts
and automate the task of detecting those with a significant amount of silence at the start.
The Code
#!/usr/bin/env python
import numpy as np
import sys
from subprocess import run
longs = []
def main():
args = sys.argv[1:]
for arg in args:
sil = proc(arg)
if sil is not None:
if sil > 5:
longs.append("{arg}:{sil:.03f}")
print(o := "\n".join(longs))
with open("longs.txt","wt") as f:
print(o,file=f)
MAX_DUR=600 # ten minutes in seconds
def proc(arg):
# get sample rate
m = run(["ffprobe","-show_streams",arg],capture_output=True)
if m.returncode != 0:
print(f"Error ffprobe {arg}")
return None
lines = m.stdout.decode().splitlines()
srlines = [ line for line in lines if "sample_rate=" in line ]
if len(srlines) == 0:
print(f"no sr: {arg}")
return None
srline = srlines[0]
sr = float(srline.split("=")[1])
# get raw pcm
m = run(["ffmpeg","-i",arg,"-t",str(MAX_DUR),"-f","s16le","-acodec","pcm_s16le","-ac","1","-"],capture_output=True)
if m.returncode != 0:
print(f"Error ffmpeg {arg}")
return None
pcm = m.stdout
a = np.frombuffer(pcm)
l = a.shape[0]
# search for non silence
for i in range(l):
x = a[i]
if x != 0:
t = i/sr
print(f"{arg}: {i} == {t}")
return t
print(f"{arg}: silence")
return l/sr
if __name__ == "__main__":
main()