#contents This simple example takes the text of the World English Bible from gutenberg (https://www.gutenberg.org/cache/epub/8294/pg8294.txt), and puts the book names and verses into two tables. A sample of the text is as follows: ```plaintext Book 01 Genesis 001:001 In the beginning God{After "God," the Hebrew has the two letters "Aleph Tav" (the first and last letters of the Hebrew alphabet) as a grammatical marker.} created the heavens and the earth. 001:002 Now the earth was formless and empty. Darkness was on the surface of the deep. God's Spirit was hovering over the surface of the waters. ``` Before we find a line of the form ```plaintext Book nn xyz... ``` we skip everything. Once we see a line of this form, we start a new book. A book consists of a number, a name, and an array of chapters. A chapter is an array of verses. A verse is a string. There are now four types of line we are interested in, and including blank lines that gives us five regular expressions: ```plaintext re_ws = re.compile(r"^\s*$") # -> skip() re_book = re.compile(r"^Book (\d+) (.*)$") # -> new_book() re_new_verse = re.compile(r"^(\d{3}):(\d{3}) (.*)$") # -> new_verse() re_cont_verse = re.compile(r"^ (.*)$") # -> cont_verse() re_end = re.compile(r"^\*\*\* END") # -> finish() ``` The script is as follows: ```plaintext #!/usr/bin/env python3 import mysql.connector import re re_ws = re.compile(r"^\s*$") # -> skip() re_book = re.compile(r"^Book (\d+) (.*)$") # -> new_book() re_new_verse = re.compile(r"^(\d{3}):(\d{3}) (.*)$") # -> new_verse() re_cont_verse = re.compile(r"^ (.*)$") # -> cont_verse() re_end = re.compile(r"^\*\*\* END") # -> finish() started = False books = [] conn = None cursor = None chapter_number = -1 def open_db(): global conn, cursor conn = mysql.connector.connect( host="localhost", user="mrflibble", password="turnip", database="bible" ) cursor = conn.cursor() def close_db(): conn.commit() conn.close() class Finished(Exception): pass def process_line(line): global started if not started: if re_book.match(line): started = True else: return if re_ws.match(line): return elif m := re_book.match(line): numstr,name = m.groups() num = int(numstr) new_book(num,name) elif m := re_new_verse.match(line): ch,vs,text = m.groups() ch = int(ch) vs = int(vs) new_verse(ch,vs,text) elif m := re_cont_verse.match(line): (text,) = m.groups() cont_verse(text) elif re_end.match(line): raise Finished() def new_book(num,name): global chapter_number print(f": {num} {name}") books.append({"number":num,"name":name,"chapters":[]}) chapter_number = 0 def new_verse(chnum,vnum,text): global chapter_number if chnum > chapter_number: books[-1]['chapters'].append([]) chapter_number = chnum books[-1]['chapters'][-1].append(text) def cont_verse(text): books[-1]['chapters'][-1][-1]+=" "+text def process_file(lines): try: for line in lines: process_line(line) except Finished: print("Done") return def process_data(): for book in books: number, name, chapters = book['number'], book['name'], book['chapters'] cursor.execute("INSERT INTO book_names (book_number, book_name) VALUES (%s,%s)",(number,name)) for i,chapter in enumerate(chapters): for j,verse in enumerate(chapter): cursor.execute("INSERT INTO verses (book_number, chapter_number, verse_number, verse_text) VALUES (%s,%s,%s,%s)",(number,i+1,j+1,verse)) print(f"{name} done") def create_tables(): cursor.execute("DROP TABLE IF EXISTS book_names") cursor.execute("DROP TABLE IF EXISTS verses") cursor.execute("CREATE TABLE book_names (book_number integer, book_name varchar(255) )") cursor.execute("CREATE TABLE verses (id integer auto_increment primary key, book_number integer, chapter_number integer, verse_number integer, verse_text text)") def main(): open_db() with open("web_bible.txt") as f: lines = f.read().rstrip().split("\n") print("Read data") process_file(lines) print("Processed lines") create_tables() process_data() close_db() if __name__ == "__main__": main() ``` ### Removing notes Notes are contained in braces {...}. To remove these, we fetch all rows whose `verse_text` contains `{{`, use a regular expression to remove the notes, and update. ```plaintext #!/usr/bin/env python3 import mysql.connector, re conn = mysql.connector.connect( host = "localhost", user = "mrflibble", password = "turnip", database = "bible" ) cursor = conn.cursor() cursor.execute("SELECT id,verse_text FROM verses WHERE verse_text LIKE '%{%'") rows = cursor.fetchall() for row in rows: verse_id, verse_text = row new_verse_text = re.sub(r"\{.*?\}","",verse_text) cursor.execute("UPDATE verses SET verse_text = %s WHERE id = %s",(new_verse_text,verse_id)) print(verse_id) conn.commit() ``` ### Full Text Search Enable when creating ```plaintext CREATE TABLE (id INTEGER AUTO_INCREMENT PRIMARY KEY, text_field TEXT, FULLTEXT KEY (text_field) ); ``` or for a pre-existing table ```plaintext ALTER TABLE verses ADD FULLTEXT(verse_text); ``` The script: ```plaintext #!/usr/bin/env python3 import mysql.connector conn = mysql.connector.connect( host = "localhost", user = "mrflibble", password = "turnip", database = "bible" ) cursor = conn.cursor() cursor.execute("ALTER TABLE verses ADD FULLTEXT(verse_text)") conn.commit() ``` Then search via ```plaintext #!/usr/bin/env python3 import mysql.connector conn = mysql.connector.connect( host = "localhost", user = "john", password = "tree", database = "bible" ) cursor = conn.cursor() cursor.execute( """SELECT book_name,chapter_number,verse_number,verse_text FROM book_names JOIN verses ON book_names.book_number = verses.book_number WHERE MATCH( verse_text ) AGAINST ('Jesus' IN NATURAL LANGUAGE MODE)""") rows = cursor.fetchall() for row in rows: book_name,chapter,verse,text = row print(f"{book_name} {chapter}:{verse} {text}") ```