-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDetokenize.py
More file actions
83 lines (67 loc) · 2.49 KB
/
Detokenize.py
File metadata and controls
83 lines (67 loc) · 2.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import sys
import re
import os
def main(argv):
predictions = open(argv[0], "r").readlines()
predictions_asCodeLines = []
for prediction in predictions:
tmp = toJavaSourceCode(prediction)
if tmp != "":
predictions_asCodeLines.append(tmp)
if(len(predictions_asCodeLines) == 0):
sys.stderr.write("All predictions contains <unk> token")
sys.exit(1)
predictions_asCodeLines_file = open(os.path.join(argv[1], "predictions_JavaSource.txt"), "w")
for predictions_asCodeLine in predictions_asCodeLines:
predictions_asCodeLines_file.write(predictions_asCodeLine + "\n")
predictions_asCodeLines_file.close()
sys.exit(0)
def toJavaSourceCode(prediction):
tokens = prediction.strip().split(" ")
tokens = [token.replace("<seq2seq4repair_space>", " ") for token in tokens]
codeLine = ""
delimiter = JavaDelimiter()
for i in range(len(tokens)):
if(tokens[i] == "<unk>"):
return ""
if(i+1 < len(tokens)):
# DEL = delimiters
# ... = method_referece
# STR = token with alphabet in it
if(not isDelimiter(tokens[i])):
if(not isDelimiter(tokens[i+1])): # STR (i) + STR (i+1)
codeLine = codeLine+tokens[i]+" "
else: # STR(i) + DEL(i+1)
codeLine = codeLine+tokens[i]
else:
if(tokens[i] == delimiter.varargs): # ... (i) + ANY (i+1)
codeLine = codeLine+tokens[i]+" "
elif(tokens[i] == delimiter.biggerThan): # > (i) + ANY(i+1)
codeLine = codeLine+tokens[i]+" "
elif(tokens[i] == delimiter.rightBrackets and i > 0):
if(tokens[i-1] == delimiter.leftBrackets): # [ (i-1) + ] (i)
codeLine = codeLine+tokens[i]+" "
else: # DEL not([) (i-1) + ] (i)
codeLine = codeLine+tokens[i]
else: # DEL not(... or ]) (i) + ANY
codeLine = codeLine+tokens[i]
else:
codeLine = codeLine+tokens[i]
return codeLine
def isDelimiter(token):
return not token.upper().isupper()
class JavaDelimiter:
@property
def varargs(self):
return "..."
@property
def rightBrackets(self):
return "]"
@property
def leftBrackets(self):
return "["
@property
def biggerThan(self):
return ">"
if __name__=="__main__":
main(sys.argv[1:])