-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreplace_brackets.py
More file actions
35 lines (25 loc) · 990 Bytes
/
replace_brackets.py
File metadata and controls
35 lines (25 loc) · 990 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def replace_brackets(s) :
return s.replace("(", "-LRB-").replace(")", "-RRB-").replace("[", "-LSB-").replace("]", "-RSB-")
#.replace("/", "\\/").replace("*", "\\*")
if __name__=="__main__":
import sys
import argparse
usage = """
Replace ( and ) by -LRB- and -RRB- in a conll corpus
Also: remove commentaries "# ... " in conll corpus
"""
parser = argparse.ArgumentParser(description = usage, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("conll", type = str, help="corpus .conll")
parser.add_argument("output", type = str, help="Output")
args = parser.parse_args()
conllfile = args.conll
output = args.output
out = open(output, "w")
for line in open(conllfile) :
if line[0] == "#" :
continue
line = line.split("\t")
if len(line) > 1 :
line[1] = replace_brackets(line[1])
out.write("\t".join(line))
out.close()