-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
33 lines (24 loc) · 963 Bytes
/
main.py
File metadata and controls
33 lines (24 loc) · 963 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import sqlite3
import re
from bs4 import BeautifulSoup
con = sqlite3.connect("nyimbodb.db")
cur = con.cursor()
res = cur.execute("SELECT song_Number, song_TitleSwahili, song_TitleEnglish, song_Lyrics FROM SONG")
songs = res.fetchall()
for song in songs:
with open(f'Tenzi/{song[0]}.txt', "w", encoding="utf-8") as f:
unparsed_lyrics = BeautifulSoup(song[3], 'html.parser')
clean_text = unparsed_lyrics.get_text(separator="\n")
parts = re.split(r'\n?\s*(\d+\.)\s*', clean_text)
verses = {}
for i in range(1, len(parts), 2):
num = parts[i]
text = parts[i+1]
verses[num] = text.strip()
f.write(f"Song Number: {song[0]}\n")
f.write(f"Swahili Title: {song[1]}\n")
f.write(f"English Title: {song[2]}\n\n")
for number, verse in verses.items():
f.write(f"Verse {number}\n")
f.write(verse)
f.write("\n\n------\n\n")