-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathregexp.py
More file actions
126 lines (103 loc) · 3.77 KB
/
regexp.py
File metadata and controls
126 lines (103 loc) · 3.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import lexer
import fa
class Token(lexer.IToken):
ALTERNATION_KIND = 'ALTERNATION'
CONCATENATION_KIND = 'CONCATENATION'
CLOSURE_KIND = 'CLOSURE'
OPERAND_KIND = 'OPERAND'
LEFT_PARENTHESIS_KIND = 'LEFT_PARENTHESIS_KIND'
RIGHT_PARENTHESIS_KIND = 'RIGHT_PARENTHESIS_KIND'
_OP_PRECEDENCE_MAP = {
ALTERNATION_KIND: 0x66,
CONCATENATION_KIND: 0xcc,
CLOSURE_KIND: 0xff,
}
def __init__(self, kind, lexeme):
self.kind = kind
self.lexeme = lexeme
return
def is_operator(self):
return self.kind != Token.OPERAND_KIND
def is_left_parenthesis(self):
return self.kind == Token.LEFT_PARENTHESIS_KIND
def is_right_parenthesis(self):
return self.kind == Token.RIGHT_PARENTHESIS_KIND
def compare_operator_precedence(self, right):
"""
:type right: Token
"""
return Token._OP_PRECEDENCE_MAP[self.kind] - Token._OP_PRECEDENCE_MAP[right.kind]
def __str__(self):
return '<%s, %s>' % (self.kind, self.lexeme)
def __cls_def_end(self):
return
class Lexer(lexer.ILexer):
ALTERNATION_TOKEN = Token(Token.ALTERNATION_KIND, '|')
CONCATENATION_TOKEN = Token(Token.CONCATENATION_KIND, '')
CLOSURE_TOKEN = Token(Token.CLOSURE_KIND, '*')
LEFT_PARENTHESIS_TOKEN = Token(Token.LEFT_PARENTHESIS_KIND, '(')
RIGHT_PARENTHESIS_TOKEN = Token(Token.RIGHT_PARENTHESIS_KIND, ')')
def __init__(self, input_data):
self.data = input_data
self.pos = 0
self.pre_token = None
return
def next(self):
if self.pos >= len(self.data):
return None
ch = self.data[self.pos]
if ch == ')':
self.pre_token = Lexer.RIGHT_PARENTHESIS_TOKEN
self.pos += 1
return self.pre_token
if ch == '|':
self.pre_token = Lexer.ALTERNATION_TOKEN
self.pos += 1
return self.pre_token
if ch == '*':
self.pre_token = Lexer.CLOSURE_TOKEN
self.pos += 1
return self.pre_token
# 用来判断是否需要返回一个 CONCATENATION_KIND token, 或许可以使用查表法来避免这么多分支.
if self.pre_token is not None and \
(self.pre_token.kind == Token.OPERAND_KIND or self.pre_token.kind == Token.CLOSURE_KIND or
self.pre_token.kind == Token.RIGHT_PARENTHESIS_KIND):
self.pre_token = Lexer.CONCATENATION_TOKEN
return self.pre_token
if ch == '(':
self.pre_token = Lexer.LEFT_PARENTHESIS_TOKEN
self.pos += 1
return self.pre_token
self.pre_token = Token(Token.OPERAND_KIND, ch)
self.pos += 1
return self.pre_token
def __cls_def_end(self):
return
def execute_rpn(tokens):
"""
:type tokens: list[Token|lexer.IToken]
:rtype: fa.FA
"""
fa_stack = []
for token in tokens:
if not token.is_operator():
fa_stack.append(fa.FA.build_from_letter(token.lexeme))
continue
if token.kind == Token.CLOSURE_KIND:
if len(fa_stack) <= 0:
raise RuntimeError("Unexpected Token: %s" % token)
fa_stack[-1].closure()
elif token.kind == Token.CONCATENATION_KIND:
if len(fa_stack) <= 1:
raise RuntimeError("Unexpected Token: %s" % token)
fa_stack[-2].concat(fa_stack[-1])
fa_stack.pop()
else:
assert token.kind == Token.ALTERNATION_KIND
if len(fa_stack) <= 1:
raise RuntimeError("Unexpected Token: %s" % token)
fa_stack[-2].alternate(fa_stack[-1])
fa_stack.pop()
if len(fa_stack) != 1:
raise RuntimeError("Lack Operator")
return fa_stack[0]