Dup Ver Goto 📝

PythonMySqlBible

PT2/aw/db/sql does not exist
To
223 lines, 707 words, 6175 chars Page 'PythonMySqlBible' does not exist.

contents

This simple example takes the text of the World English Bible from gutenberg (https://www.gutenberg.org/cache/epub/8294/pg8294.txt), and puts the book names and verses into two tables.

A sample of the text is as follows:

Book 01 Genesis
001:001 In the beginning God{After "God," the Hebrew has the two letters
        "Aleph Tav" (the first and last letters of the Hebrew alphabet)
        as a grammatical marker.} created the heavens and the earth.
001:002 Now the earth was formless and empty.  Darkness was on the surface
        of the deep.  God's Spirit was hovering over the surface
        of the waters.

Before we find a line of the form

Book nn xyz...

we skip everything.

Once we see a line of this form, we start a new book. A book consists of a number, a name, and an array of chapters. A chapter is an array of verses. A verse is a string.

There are now four types of line we are interested in, and including blank lines that gives us five regular expressions:

re_ws = re.compile(r"^\s*$")                          # -> skip()
re_book = re.compile(r"^Book (\d+) (.*)$")            # -> new_book()
re_new_verse = re.compile(r"^(\d{3}):(\d{3}) (.*)$")  # -> new_verse()
re_cont_verse = re.compile(r"^        (.*)$")         # -> cont_verse()
re_end = re.compile(r"^\*\*\* END")                   # -> finish()

The script is as follows:

#!/usr/bin/env python3

import mysql.connector
import re

re_ws = re.compile(r"^\s*$") # -> skip()
re_book = re.compile(r"^Book (\d+) (.*)$") # -> new_book()
re_new_verse = re.compile(r"^(\d{3}):(\d{3}) (.*)$") # -> new_verse()
re_cont_verse = re.compile(r"^        (.*)$") # -> cont_verse()
re_end = re.compile(r"^\*\*\* END") # -> finish()

started = False
books = []
conn = None
cursor = None
chapter_number = -1

def open_db():
  global conn, cursor
  conn = mysql.connector.connect(
    host="localhost",
    user="mrflibble",
    password="turnip",
    database="bible"
  )
  cursor = conn.cursor()  

def close_db():
  conn.commit()
  conn.close()

class Finished(Exception):
  pass

def process_line(line):
  global started
  if not started:
    if re_book.match(line):
      started = True
    else:
      return
  if re_ws.match(line):
    return
  elif m := re_book.match(line):
    numstr,name = m.groups()
    num = int(numstr)
    new_book(num,name)    
  elif m := re_new_verse.match(line):
    ch,vs,text = m.groups()
    ch = int(ch)
    vs = int(vs)
    new_verse(ch,vs,text)
  elif m := re_cont_verse.match(line):
    (text,) = m.groups()
    cont_verse(text)
  elif re_end.match(line):
    raise Finished()

def new_book(num,name):
  global chapter_number
  print(f": {num} {name}")
  books.append({"number":num,"name":name,"chapters":[]})
  chapter_number = 0

def new_verse(chnum,vnum,text):
  global chapter_number
  if chnum > chapter_number:
    books[-1]['chapters'].append([])
    chapter_number = chnum
  books[-1]['chapters'][-1].append(text)

def cont_verse(text):
  books[-1]['chapters'][-1][-1]+=" "+text

def process_file(lines):
  try:
    for line in lines:
      process_line(line)
  except Finished:
    print("Done")
    return

def process_data():
  for book in books:
    number, name, chapters = book['number'], book['name'], book['chapters']
    cursor.execute("INSERT INTO book_names (book_number, book_name) VALUES (%s,%s)",(number,name))
    for i,chapter in enumerate(chapters):
      for j,verse in enumerate(chapter):
        cursor.execute("INSERT INTO verses (book_number, chapter_number, verse_number, verse_text) VALUES (%s,%s,%s,%s)",(number,i+1,j+1,verse))
    print(f"{name} done")

def create_tables():
  cursor.execute("DROP TABLE IF EXISTS book_names")
  cursor.execute("DROP TABLE IF EXISTS verses")

  cursor.execute("CREATE TABLE book_names (book_number integer, book_name varchar(255) )")
  cursor.execute("CREATE TABLE verses (id integer auto_increment primary key, book_number integer, chapter_number integer, verse_number integer, verse_text text)")

def main():
  open_db()
  with open("web_bible.txt") as f:
    lines = f.read().rstrip().split("\n")
    print("Read data")
    process_file(lines)
  print("Processed lines")
  create_tables()
  process_data()
  close_db()

if __name__ == "__main__":
  main()

Removing notes

Notes are contained in braces .... To remove these, we fetch all rows whose verse_text contains {{, use a regular expression to remove the notes, and update.

#!/usr/bin/env python3

import mysql.connector, re
conn = mysql.connector.connect(
  host = "localhost",
  user = "mrflibble",
  password = "turnip",
  database = "bible"
)

cursor = conn.cursor()

cursor.execute("SELECT id,verse_text FROM verses WHERE verse_text LIKE '%{%'")
rows = cursor.fetchall()
for row in rows:
    verse_id, verse_text = row
    new_verse_text = re.sub(r"\{.*?\}","",verse_text)
    cursor.execute("UPDATE verses SET verse_text = %s WHERE id = %s",(new_verse_text,verse_id))
    print(verse_id)
conn.commit()

Full Text Search

Enable when creating

CREATE TABLE (id INTEGER AUTO_INCREMENT PRIMARY KEY, text_field TEXT, FULLTEXT KEY (text_field) );

or for a pre-existing table

ALTER TABLE verses ADD FULLTEXT(verse_text);

The script:

#!/usr/bin/env python3

import mysql.connector

conn = mysql.connector.connect(
  host = "localhost",
  user = "mrflibble",
  password = "turnip",
  database = "bible"
)
cursor = conn.cursor()

cursor.execute("ALTER TABLE verses ADD FULLTEXT(verse_text)")
conn.commit()

Then search via

#!/usr/bin/env python3

import mysql.connector
conn = mysql.connector.connect(
  host = "localhost",
  user = "john",
  password = "tree",
  database = "bible"
)

cursor = conn.cursor()

cursor.execute(
"""SELECT book_name,chapter_number,verse_number,verse_text 
     FROM book_names JOIN verses ON book_names.book_number = verses.book_number 
     WHERE MATCH( verse_text ) AGAINST ('Jesus' IN NATURAL LANGUAGE MODE)""")

rows = cursor.fetchall()

for row in rows:
  book_name,chapter,verse,text = row
  print(f"{book_name} {chapter}:{verse} {text}")