Dup Ver Goto 📝

Poor Man's Awk

To
75 lines, 317 words, 2300 chars Page 'PoorMansAwk' does not exist.

This is just a quick way of writing an Awk-like text processor in Python. Instead of actions being written in a language like the real Awk, they are functions (or any callable) which take the processor, the line, and the output of any pattern matcher as arguments and do something.

I will probably never actually use this, but it was a fun easy exercise to sit down and write it.

idea = """
My AWK like thing in python.
Rules are regular expressions, as in the re module.
Actions are python functions, which are given the match object.
There are some constants: BEGIN, END, which are integers.
A None pattern matches everything.
"""

import re
testin = open("som.txt").read()

class Awk:
  BEGIN = 1
  END = 2
  def __init__(self,prog=None):
    self.beginstr = None
    self.endinstr = None
    self.prog = []
    self.vars = {}
    if prog is not None:
      for patt, func in prog:
        self.add_instr(patt, func)
  def add_instr(self,patt,func):
    if type(patt) is str:
      patt = re.compile(patt)
    self.prog.append((patt,func))
  def run(self,text):
    lines = text.splitlines()
    nlines = len(lines)
    for i,line in enumerate(lines):
      for patt, func in self.prog:
        m = None
        if patt == Awk.BEGIN and i == 0:
          func(self,line,None)
        if patt == Awk.END and i+1 == nlines:
          func(self,line,None)
        if type(patt) is re.Pattern:
          m = patt.search(line)
          if m:
            func(self,line,m)
        elif callable(patt):
          m = patt(line)
          if m:
            func(self,line,m)

Example

prog = [
    (Awk.BEGIN, lambda a,line,match: print("BEGIN")),

    # Here "Jesus" and "said" are compiled into regex's
    ("Jesus", lambda a,line,match: print("Jesus:",line,match)),
    ("said", lambda a,line,match: print("said:",line,match)),

    # pattern is a callable
    (lambda t: len(t) > 10 and len(t.split(" ")) < 10, lambda a,line,match: print("SHORT:",line)),

    # Put END last so as to happen after any other patterns that match on the final line
    (Awk.END, lambda a,line,match: print("END")),
]

awk = Awk(prog)
awk.run(testin)

To have case insensitive patterns, for example, compile the regex and pass the re.Pattern object as the pattern to match, rather than the string.