This is just a quick way of writing an Awk-like text processor in Python. Instead of actions being written in a language like the real Awk, they are functions (or any callable) which take the processor, the line, and the output of any pattern matcher as arguments and do something.
I will probably never actually use this, but it was a fun easy exercise to sit down and write it.
idea = """
My AWK like thing in python.
Rules are regular expressions, as in the re module.
Actions are python functions, which are given the match object.
There are some constants: BEGIN, END, which are integers.
A None pattern matches everything.
"""
import re
testin = open("som.txt").read()
class Awk:
BEGIN = 1
END = 2
def __init__(self,prog=None):
self.beginstr = None
self.endinstr = None
self.prog = []
self.vars = {}
if prog is not None:
for patt, func in prog:
self.add_instr(patt, func)
def add_instr(self,patt,func):
if type(patt) is str:
patt = re.compile(patt)
self.prog.append((patt,func))
def run(self,text):
lines = text.splitlines()
nlines = len(lines)
for i,line in enumerate(lines):
for patt, func in self.prog:
m = None
if patt == Awk.BEGIN and i == 0:
func(self,line,None)
if patt == Awk.END and i+1 == nlines:
func(self,line,None)
if type(patt) is re.Pattern:
m = patt.search(line)
if m:
func(self,line,m)
elif callable(patt):
m = patt(line)
if m:
func(self,line,m)
Example
prog = [
(Awk.BEGIN, lambda a,line,match: print("BEGIN")),
# Here "Jesus" and "said" are compiled into regex's
("Jesus", lambda a,line,match: print("Jesus:",line,match)),
("said", lambda a,line,match: print("said:",line,match)),
# pattern is a callable
(lambda t: len(t) > 10 and len(t.split(" ")) < 10, lambda a,line,match: print("SHORT:",line)),
# Put END last so as to happen after any other patterns that match on the final line
(Awk.END, lambda a,line,match: print("END")),
]
awk = Awk(prog)
awk.run(testin)
To have case insensitive patterns, for example, compile the regex and pass the re.Pattern object
as the pattern to match, rather than the string.