-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathnlp.py
More file actions
27 lines (19 loc) · 680 Bytes
/
nlp.py
File metadata and controls
27 lines (19 loc) · 680 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from nltk import tokenize
import re
def getVerses(path):
with open(path, 'r') as f:
kjv = f.read()
pattern = re.compile(r"(\d+:\d+.*?)\n\n", re.MULTILINE | re.DOTALL)
return [x.replace("\n", ' ') for x in pattern.findall(kjv)]
def getWords(path):
with open(path, 'r') as f:
text = f.read()
return [word.replace("\n", ' ') for word in text.split(' ')]
def getSentences(path):
with open(path, 'r') as f:
text = f.read()
return [s.replace("\n", ' ') for s in tokenize.sent_tokenize(text)]
def getLines(path):
with open(path, 'r') as f:
text = f.read()
return [x.replace("\n", '') for x in text.split("\n")]