DetectSilenceExample : /lang/python/numpy : PT2

This is a quick script to detect a run of silence at the start of an audio file. It uses ffmpeg to extract the first 10 minutes as raw 16bit PCM with a single channel. This is then turned into a numpy array, through which we search for the first nonzero int16. (One could check with a small epsilon, rather than testing for identically zero.) I wrote this for some DJ podcasts where occasionally there were a few minutes of silence before the podcast proper began, and I wanted to run through a large collection of podcasts and automate the task of detecting those with a significant amount of silence at the start.

The Code

#!/usr/bin/env python
import numpy as np
import sys
from subprocess import run

longs = []
def main():
  args = sys.argv[1:]
  for arg in args:
    sil = proc(arg)
    if sil is not None:
      if sil > 5:
        longs.append("{arg}:{sil:.03f}")
  print(o := "\n".join(longs))
  with open("longs.txt","wt") as f:
    print(o,file=f)

MAX_DUR=600 # ten minutes in seconds
def proc(arg):
  # get sample rate
  m = run(["ffprobe","-show_streams",arg],capture_output=True)
  if m.returncode != 0:
    print(f"Error ffprobe {arg}")
    return None
  lines = m.stdout.decode().splitlines()
  srlines = [ line for line in lines if "sample_rate=" in line ]
  if len(srlines) == 0:
    print(f"no sr: {arg}")
    return None
  srline = srlines[0]
  sr = float(srline.split("=")[1])

  # get raw pcm
  m = run(["ffmpeg","-i",arg,"-t",str(MAX_DUR),"-f","s16le","-acodec","pcm_s16le","-ac","1","-"],capture_output=True)
  if m.returncode != 0:
    print(f"Error ffmpeg {arg}")
    return None
  pcm = m.stdout
  a = np.frombuffer(pcm)
  l = a.shape[0]

  # search for non silence
  for i in range(l):
    x = a[i]
    if x != 0:
      t = i/sr
      print(f"{arg}: {i} == {t}")
      return t
  print(f"{arg}: silence")
  return l/sr

if __name__ == "__main__":
  main()