Automatically making backup copies, using hashing to help generate unique backup filenames.
I don't actually use this particular script, rather it's an illustration of the bits needed
for this task. Perhaps one would like to recursively search for files. Then perhaps maintain
a dict of last known mtimes rather than computing the hash every time. Quite possibly we don't
need hashing to ensure unique filenames. Anyway, this is just a sketch of the sort of thing you
need, and it's only a few dozen lines, so you can quickly knock out something like this for your
particular use case. Then there are modules that watch the filesystem for activity (e.g. watchdog).
#!/usr/bin/env python
_description = """
A very simple way to autobackup files to unique names.
Not efficient as it will read all files every time it loops.
Quite likely we don't need the hashing at all, just using
the mtime and original filename.
"""
from icecream import ic; ic.configureOutput(includeContext=True)
import hashlib
from glob import glob
import re
import datetime
import os
import time
hashbak_dir = "/tmp/a"
os.makedirs(hashbak_dir,exist_ok=True)
def hashbak():
allfiles = glob("*")
rppfiles = [ x for x in a if re.search(r"\.rpp$",x,re.I) ]
for rppfile in rppfiles:
if not os.path.isfile(rppfile):
continue
# compute hash of file
s = hashlib.sha256()
s.update(open(x,"rb").read())
h = s.hexdigest()
h8 = h[:8] # h8 is first 8 hex digits of hash, probably enough
# get last modified time
m = os.path.getmtime(x)
d = datetime.datetime.fromtimestamp(m)
dt = d.strftime("%Y_%m_%d_%H_%M_%S")
# strip of .rpp from end, saving extension in e
y = x[:-4]
e = x[-3:]
# construct output file name
# this will be the same if the mtime is the same and the contents are
# the same. It is very unlikely that two different files will get
# the same ofn. To be more certain, use more of the hex digits
# from the hexdigest
ofn = f"{y}__{dt}__{h8}.{e}"
ofn = f"{hashbak_dir}/{ofn}"
# Only copy if the backup file does not exist
if not os.path.exists(ofn):
with open(x,"rb") as f:
data = f.read()
with open(ofn,"wb") as f:
f.write(data)
print(f"Written {ofn}")
else:
print(f"Already exists: {ofn}")
# Sit in a loop looking for files
while True:
try:
hashbak()
except Exception as e:
ic(e)
time.sleep(10)