Testing existence of files
exists — true if at least one argument exists
#!/usr/bin/perl
for(@ARGV) { -e && exit 0; } exit 1;
allexist — false if at least one argument does not exist
#!/usr/bin/perl
for(@ARGV) { ! -e && exit 1; } exit 0;
filterstat — filter(stat,argv[1:])
#!/usr/bin/perl
for(@ARGV) { -e && print "$_\n"; }
Power rename aka file search and replace
filesr — string replace in filenames
#!/usr/bin/env python3
import sys,os
import uuid,os.path
args = sys.argv[1:]
overwrite = False
if os.getenv("OVERWRITE") in ["y","Y"]:
overwrite = True
forreal = True
if os.getenv("DRYRUN") is not None and os.getenv("DRYRUN") not in ["n","N"]:
forreal = False
elif os.getenv("D") is not None and os.getenv("D") not in ["n","N"]:
forreal = False
if len(args) < 3:
print("filesr searchpat replpat <files>")
sys.exit(1)
s,r = tuple(args[:2])
files = args[2:]
print(f"""filesr
search: {s}
replace with: {r}""")
def rn(x,y):
if forreal:
if os.path.exists(y):
print(f"{y} exists")
if overwrite:
try:
print(f"Overwritten {y}")
os.rename(x,y)
except IsADirectoryError:
print(f"{y} is a directory")
else:
os.rename(x,y)
while True:
t = str(uuid.uuid4())
if not os.path.exists(t):
break
print("Temp name {}".format(t))
for x in files:
y = x.replace(s,r)
if x == y:
pass
elif os.path.exists(y):
print("{} already exists".format(y))
else:
xa = x.lower()
ya = y.lower()
if xa == ya:
print("Capitalisation issue")
print(f"Using temp name {t}: {x} --> {y}")
rn(x,t)
rn(t,y)
else:
print(f"Rename {x} --> {y}")
rn(x,y)
filesrx — regex replace in filenames
#!/usr/bin/env python3
import sys,os,re
import uuid,os.path
args = sys.argv[1:]
overwrite = False
verbose = False
if os.getenv("OVERWRITE") in ["y","Y"]:
overwrite = True
if os.getenv("VERBOSE") in ["y","Y"]:
verbose = True
forreal = True
if os.getenv("DRYRUN") is not None and os.getenv("DRYRUN") not in ["n","N"]:
forreal = False
elif os.getenv("D") is not None and os.getenv("D") not in ["n","N"]:
forreal = False
if len(args) < 3:
print("filesrx searchpat replpat <files>")
print(" searchpad is regex")
if len(args) == 2:
print("(did you forget to specify files)")
sys.exit(1)
s,r = tuple(args[:2])
files = args[2:]
print(f"""filesrx
regex: {s}
replace with: {r}""")
sr = re.compile(s)
def rn(x,y):
if x == y:
print(f"No change in {x}")
return
if forreal:
if os.path.exists(y):
print(f"{y} exists")
if overwrite:
try:
print(f"Overwritten {y}")
os.rename(x,y)
except IsADirectoryError:
print(f"{y} is a directory")
else:
os.rename(x,y)
while True:
t = str(uuid.uuid4())
if not os.path.exists(t):
break
print("Temp name {}".format(t))
for x in files:
try:
m = sr.search(x)
if m is None and verbose:
print(f"File {x} does not match {s}")
y = sr.sub(r,x)
except Exception as e:
print(f"Regex {s} failed for file {x}")
print(e)
continue
if x == y:
pass
elif os.path.exists(y):
print(f"{y} already exists")
else:
xa = x.lower()
ya = y.lower()
if xa == ya:
print("Capitalisation issue")
print(f"Using temp name {t}: {x} --> {y}")
rn(x,t)
rn(t,y)
else:
print(f"Rename {x} --> {y}")
rn(x,y)
Finding stuff
fdup — find duplicate files, first finding candidates by size, then by hashing the first 64k, then by the first 1M, then all the file (if desired – usually hashing the first 64k is enough to whittle things down to a small enough set to do by hand)
#!/usr/bin/env python3
import sys, os, os.path
import hashlib
from collections import defaultdict
quick = False
def doprint(*xs,**kw):
if "file" in kw:
f = kw['file']
else:
f = sys.stdout
print(*xs,**kw)
f.flush()
def hash_first(filename,chunk_size=64*1024):
doprint(f"Hashing first {chunk_size} of {filename}")
with open(filename,"rb") as f:
bytes = f.read(chunk_size)
hash = hashlib.sha256(bytes).hexdigest()
return hash
def hash_all(filename,chunk_size=1024*1024):
doprint(f"Hasing all of {filename}")
sz = os.path.getsize(filename)
x = 0
sha = hashlib.sha256()
i = 0
with open(filename,"rb") as f:
while byte_block := f.read(chunk_size):
doprint(".",end="")
sha.update(byte_block)
x += len(byte_block)
i += 1
if i >= 30:
pc = 100*(x/sz)
doprint(f" {pc:0.3f}%")
i = 0
doprint()
return sha.hexdigest()
#roots = sys.argv[1:]
roots = []
for x in sys.argv[1:]:
if x == "-q":
quick = True
else:
roots.append(x)
class T:
def __init__(self,t=10):
self.t = t
def __call__(self):
self.t -= 1
if self.t <= 0:
doprint(f"Exiting")
sys.exit(0)
# t = T(1000)
# pass 1: compile by_size
doprint("Finding files")
by_size = defaultdict(list)
for root in roots:
for rt, dirs, files in os.walk(root):
for f in files:
doprint(".",end="")
path = os.path.join(rt,f)
sz = os.path.getsize(path)
by_size[sz].append(path)
doprint("Done finding files")
candidates = []
for sz,fs in by_size.items():
if len(fs) > 1:
candidates += fs
doprint(f"{len(candidates)} candidates by size")
if len(candidates) == 0:
exit(0)
# pass 2: compile by_hash64k
by_hash64k = defaultdict(list)
for c in candidates:
h = hash_first(c,64*1024)
by_hash64k[h].append(c)
candidates = []
for h,fs in by_hash64k.items():
if len(fs) > 1:
candidates += fs
doprint(f"{len(candidates)} candidates by hash 64k")
if len(candidates) == 0:
exit(0)
# pass 3: compilie by_hash1m
by_hash1m = defaultdict(list)
for c in candidates:
h = hash_first(c,1024*1024)
by_hash1m[h].append(c)
candidates = []
for h,fs in by_hash1m.items():
if len(fs) > 1:
candidates += fs
doprint(f"{len(candidates)} candidates by hash 1M")
if len(candidates) == 0:
exit(0)
if quick:
dups = False
for h,fs in by_hash1m.items():
if len(fs) > 1:
if not dups:
dups = True
doprint(f"Dups:\n=====\n\n")
doprint(f"hash {h}:")
for f in fs:
doprint(f" {f}")
else:
# pass 4: compile by hashall
by_hashall = defaultdict(list)
dups = False
for c in candidates:
h = hash_all(c)
by_hashall[h].append(c)
for h,fs in by_hashall.items():
if len(fs) > 1:
if not dups:
dups = True
doprint(f"Dups:\n=====\n\n")
doprint(f"hash {h}:")
for f in fs:
doprint(f" {f}")
find_empty_files
#!/bin/bash
A="$1"
shift
find "$A" -size 0 "$@"
FS related
fsof — find the filesystem containing a file (there is probably a far more elegant way to do this,
especially if you know Linux's C apis better than I do)
#!/bin/bash
A="$1"
if [ -e "$A" ]; then
df "$A" | awk '/dev/ { print $1 }' | xargs lsblk -f | grep -v NAME | awk '{ print $2 }'
elif [ -z "$A" ]; then
echo "fsof <file on fs>"
else
echo "File or directory '$A' does not exist"
fi
guessbd — map a2 to /dev/sda2, sda2 to /dev/sda2, and a file in general to the block device containing its filesystem.
#!/bin/bash
f() {
if [ -b "$1" ]; then
echo "$1"
exit 0
fi
}
f "$1"
f "/dev/$1"
f "/dev/sd$1"
if [ -e "$1" ]; then
A="$(df "$1" | tail -n1 | cut -f1 -d\ )"
f "$A"
fi
echo "Failed to guess block device for '$1'"
exit 1