A mess. Really needs to be rewritten to use argparse.
But if I want to generate a bulleted list of links for this wiki,
given a list of URL's, this does the job.
#!/usr/bin/env python
import os
import requests
import json
import re
import sys
urlre = re.compile(r"^https?://")
args = sys.argv[1:]
progname = sys.argv[0]
stdin = None
lines = []
html = False
markdown = False
numeric = False
args2 = []
for arg in args:
if arg == "-":
args2.append(arg)
elif arg.startswith("-"):
if arg in ["--html","-h"]:
html = True
elif arg in ["--markdown","-m","--md","--ptmd"]:
markdown = True
elif arg in ["--numeric","-n"]:
numeric = True
elif arg == "--help":
print(f"{progname}")
print(" -m --md --markdown --ptmd -p: markdown output")
print(" -h --html: html output (overridden by markdown")
print(" -n --numeric: numbered lists (else bulleted)")
exit(0)
else:
print("Unrecognised switch",arg)
else:
args2.append(arg)
trim = os.getenv("TRIM","")
if len(args2) == 0:
args2 = ["-"]
for arg in args2:
if arg == "-":
if stdin is None:
stdin = sys.stdin.read()
lines.append(stdin)
else:
try:
with open(arg) as f:
lines.append(f.read().rstrip())
except Exception as e:
print(f"Exception {e} {type(e)} reading {arg}")
continue
lines = "\n".join(lines)
lines = lines.split("\n")
lines = [ x for x in lines if x.strip() != "" ]
if markdown:
html = False
if html:
if numeric:
print("<ol>")
else:
print("<ul>")
idx = 0
for line in lines:
if not urlre.match(line):
print(f"Not url: {line}")
continue
url = line
r = requests.get(url)
t = r.text.replace("\r","").replace("\n"," ")
m = re.search(r"<title[^>]*>(.*?)</title>",t,re.I)
if not m:
print(f"No title for {url}")
continue
title = m.group(1)
if trim != "":
title = re.sub(f"{trim}$","",title)
title = title.strip()
idx += 1
if markdown:
if numeric:
print(f"{idx}. [{title}]({url})")
else:
print(f"* [{title}]({url})")
elif html:
print(f"<li><a href='{url}'>{title}</a></li>")
else:
print(f"{url} {title}")
if html:
if numeric:
print("</ol>")
else:
print("</ul>")