home : /linux/my-custom-commands : PT2

Testing existence of files

exists — true if at least one argument exists

#!/usr/bin/perl
for(@ARGV) { -e && exit 0; } exit 1;

allexist — false if at least one argument does not exist

#!/usr/bin/perl
for(@ARGV) { ! -e && exit 1; } exit 0;

filterstat — filter(stat,argv[1:])

#!/usr/bin/perl
for(@ARGV) { -e && print "$_\n"; }

Power rename aka file search and replace

filesr — string replace in filenames

#!/usr/bin/env python3

import sys,os
import uuid,os.path

args = sys.argv[1:]

overwrite = False
if os.getenv("OVERWRITE") in ["y","Y"]:
  overwrite = True

forreal = True
if os.getenv("DRYRUN") is not None and os.getenv("DRYRUN") not in ["n","N"]:
    forreal = False
elif os.getenv("D") is not None and os.getenv("D") not in ["n","N"]:
    forreal = False

if len(args) < 3:
    print("filesr searchpat replpat <files>")
    sys.exit(1)

s,r = tuple(args[:2])
files = args[2:]
print(f"""filesr
      search: {s}
replace with: {r}""")

def rn(x,y):
    if forreal:
      if os.path.exists(y):
        print(f"{y} exists")
        if overwrite:
          try:
            print(f"Overwritten {y}")
            os.rename(x,y)
          except IsADirectoryError:
            print(f"{y} is a directory")
      else:
        os.rename(x,y)

while True:
    t = str(uuid.uuid4())
    if not os.path.exists(t):
        break
print("Temp name {}".format(t))

for x in files:
    y = x.replace(s,r)
    if x == y:
        pass
    elif os.path.exists(y):
        print("{} already exists".format(y))
    else:
        xa = x.lower()
        ya = y.lower()
        if xa == ya:
            print("Capitalisation issue")
            print(f"Using temp name {t}: {x} --> {y}")
            rn(x,t)
            rn(t,y)
        else:
            print(f"Rename {x} --> {y}")
            rn(x,y)

filesrx — regex replace in filenames

#!/usr/bin/env python3

import sys,os,re
import uuid,os.path

args = sys.argv[1:]

overwrite = False
verbose = False
if os.getenv("OVERWRITE") in ["y","Y"]:
  overwrite = True
if os.getenv("VERBOSE") in ["y","Y"]:
  verbose = True

forreal = True
if os.getenv("DRYRUN") is not None and os.getenv("DRYRUN") not in ["n","N"]:
    forreal = False
elif os.getenv("D") is not None and os.getenv("D") not in ["n","N"]:
    forreal = False

if len(args) < 3:
    print("filesrx searchpat replpat <files>")
    print("  searchpad is regex")
    if len(args) == 2:
      print("(did you forget to specify files)")
    sys.exit(1)

s,r = tuple(args[:2])
files = args[2:]
print(f"""filesrx
       regex: {s}
replace with: {r}""")
sr = re.compile(s)

def rn(x,y):
    if x == y:
      print(f"No change in {x}")
      return
    if forreal:
      if os.path.exists(y):
        print(f"{y} exists")
        if overwrite:
          try:
            print(f"Overwritten {y}")
            os.rename(x,y)
          except IsADirectoryError:
            print(f"{y} is a directory")
      else:
        os.rename(x,y)

while True:
    t = str(uuid.uuid4())
    if not os.path.exists(t):
        break
print("Temp name {}".format(t))

for x in files:
    try:
      m = sr.search(x)
      if m is None and verbose:
        print(f"File {x} does not match {s}")
      y = sr.sub(r,x)
    except Exception as e:
      print(f"Regex {s} failed for file {x}")
      print(e)
      continue
    if x == y:
        pass
    elif os.path.exists(y):
        print(f"{y} already exists")
    else:
        xa = x.lower()
        ya = y.lower()
        if xa == ya:
            print("Capitalisation issue")
            print(f"Using temp name {t}: {x} --> {y}")
            rn(x,t)
            rn(t,y)
        else:
            print(f"Rename {x} --> {y}")
            rn(x,y)

Finding stuff

fdup — find duplicate files, first finding candidates by size, then by hashing the first 64k, then by the first 1M, then all the file (if desired – usually hashing the first 64k is enough to whittle things down to a small enough set to do by hand)

#!/usr/bin/env python3
import sys, os, os.path
import hashlib
from collections import defaultdict

quick = False

def doprint(*xs,**kw):
  if "file" in kw:
    f = kw['file']
  else:
    f = sys.stdout
  print(*xs,**kw)
  f.flush()

def hash_first(filename,chunk_size=64*1024):
  doprint(f"Hashing first {chunk_size} of {filename}")
  with open(filename,"rb") as f:
    bytes = f.read(chunk_size)
    hash = hashlib.sha256(bytes).hexdigest()
    return hash

def hash_all(filename,chunk_size=1024*1024):
  doprint(f"Hasing all of {filename}")
  sz = os.path.getsize(filename)
  x = 0
  sha = hashlib.sha256()
  i = 0
  with open(filename,"rb") as f:
    while byte_block := f.read(chunk_size):
      doprint(".",end="")
      sha.update(byte_block)
      x += len(byte_block)
      i += 1
      if i >= 30:
        pc = 100*(x/sz)
        doprint(f" {pc:0.3f}%")
        i = 0
    doprint()
    return sha.hexdigest()


#roots = sys.argv[1:]
roots = []
for x in sys.argv[1:]:
  if x == "-q":
    quick = True
  else:
    roots.append(x)

class T:
  def __init__(self,t=10):
    self.t = t
  def __call__(self):
    self.t -= 1
    if self.t <= 0:
      doprint(f"Exiting")
      sys.exit(0)

# t = T(1000)

# pass 1: compile by_size
doprint("Finding files")
by_size = defaultdict(list)
for root in roots:
  for rt, dirs, files in os.walk(root):
    for f in files:
      doprint(".",end="")
      path = os.path.join(rt,f)
      sz = os.path.getsize(path)
      by_size[sz].append(path)
doprint("Done finding files")

candidates = []
for sz,fs in by_size.items():
  if len(fs) > 1:
    candidates += fs
doprint(f"{len(candidates)} candidates by size")
if len(candidates) == 0:
  exit(0)

# pass 2: compile by_hash64k
by_hash64k = defaultdict(list)
for c in candidates:
  h = hash_first(c,64*1024)
  by_hash64k[h].append(c)
candidates = []
for h,fs in by_hash64k.items():
  if len(fs) > 1:
    candidates += fs
doprint(f"{len(candidates)} candidates by hash 64k")
if len(candidates) == 0:
  exit(0)

# pass 3: compilie by_hash1m
by_hash1m = defaultdict(list)
for c in candidates:
  h = hash_first(c,1024*1024)
  by_hash1m[h].append(c)
candidates = []
for h,fs in by_hash1m.items():
  if len(fs) > 1:
    candidates += fs
doprint(f"{len(candidates)} candidates by hash 1M")
if len(candidates) == 0:
  exit(0)

if quick:
  dups = False
  for h,fs in by_hash1m.items():
    if len(fs) > 1:
      if not dups:
        dups = True
        doprint(f"Dups:\n=====\n\n")
      doprint(f"hash {h}:")
      for f in fs:
        doprint(f"  {f}")
else:
  # pass 4: compile by hashall
  by_hashall = defaultdict(list)
  dups = False
  for c in candidates:
    h = hash_all(c)
    by_hashall[h].append(c)
  for h,fs in by_hashall.items():
    if len(fs) > 1:
      if not dups:
        dups = True
        doprint(f"Dups:\n=====\n\n")
      doprint(f"hash {h}:")
      for f in fs:
        doprint(f"  {f}")

find_empty_files

#!/bin/bash
A="$1"
shift
find "$A" -size 0 "$@"

FS related

fsof — find the filesystem containing a file (there is probably a far more elegant way to do this, especially if you know Linux's C apis better than I do)

#!/bin/bash
A="$1"
if [ -e "$A" ]; then
  df "$A" | awk '/dev/ { print $1 }' | xargs lsblk -f | grep -v NAME | awk '{ print $2 }'
elif [ -z "$A" ]; then
  echo "fsof <file on fs>"
else
  echo "File or directory '$A' does not exist"
fi

guessbd — map a2 to /dev/sda2, sda2 to /dev/sda2, and a file in general to the block device containing its filesystem.

#!/bin/bash
f() {
    if [ -b "$1" ]; then
        echo "$1"
        exit 0
    fi
}
f "$1"
f "/dev/$1"
f "/dev/sd$1"
if [ -e "$1" ]; then
    A="$(df "$1" | tail -n1 | cut -f1 -d\ )"
    f "$A"
fi
echo "Failed to guess block device for '$1'"
exit 1