|
30 | 30 |
|
31 | 31 | ;; Parses a string to form the nfa |
32 | 32 | (defn string-to-nfa |
33 | | - [word wordtype] |
| 33 | + [wordtype word] |
34 | 34 | (let |
35 | 35 | [stateS (gensym :s) |
36 | | - |
37 | 36 | ;; List of substrings of word, stored as strings |
38 | 37 | states-map (set (rest (reductions str (str) word))) |
39 | 38 |
|
|
51 | 50 | transitions-map (into #{ [stateS (get gensym-map (str (first (seq word))) \a) (first (seq word))] } |
52 | 51 | (for [v (partition 2 1 (vals gensym-map))] |
53 | 52 | [(first v) (second v) (get states-char-map (second v))]))] |
54 | | - (make-NFA (into #{} ) |
| 53 | + (make-NFA (into #{} (concat (seq word))) |
55 | 54 | states-map |
56 | 55 | stateS |
57 | 56 | accept-states-map |
|
67 | 66 | args (rest arguments) |
68 | 67 | ;; Key: string for keyword, Value: NFA for that keyword |
69 | 68 | strings-nfas (into (sorted-map) (for [nfa-name args] |
70 | | - [nfa-name (string-to-nfa nfa-name class)])) |
| 69 | + [nfa-name (string-to-nfa class nfa-name)])) |
71 | 70 | all-states (apply union (map :states (vals strings-nfas))) |
72 | 71 | all-accept-states (apply union (map :accept-states (vals strings-nfas))) |
73 | 72 | merged-transitions (apply merge (map :transitions (vals strings-nfas))) |
|
88 | 87 |
|
89 | 88 | ;; Reading the file |
90 | 89 | (def readFile |
91 | | - (with-open [rdr (clojure.java.io/reader "src/watcompiler/Tokens.txt")] |
92 | | - (reduce conj [] (line-seq rdr)))) |
93 | | - |
94 | | -;; Splitting the lines by space |
95 | | -(def splitLines |
96 | 90 | (into [] |
97 | | - (for [x readFile] |
98 | | - (str/split x #" ")))) |
| 91 | + (for [line (str/split-lines (slurp "src/watcompiler/Tokens.txt"))] |
| 92 | + (str/split line #" ")))) |
99 | 93 |
|
100 | 94 | (def fileFormed-nfa |
101 | | - (let [nfas |
102 | | - (into [] |
103 | | - (for [x splitLines] |
104 | | - (apply form-multiple-nfas x)))] |
105 | | - (apply merge-nfas nfas))) |
| 95 | + (let [nfas (into [] |
| 96 | + (for [x readFile] |
| 97 | + ;; Check for regex based NFAs |
| 98 | + (if |
| 99 | + (or |
| 100 | + (if (= (first x) "IDENTIFIER") true false) |
| 101 | + (if (= (first x) "INT-LITERAL") true false) |
| 102 | + (if (= (first x) "STRING-LITERAL") true false) |
| 103 | + (if (= (first x) "CHARACTER-LITERAL") true false) |
| 104 | + (if (= (first x) "WHITESPACE") true false)) |
| 105 | + nil |
| 106 | + (apply form-multiple-nfas x))))] |
| 107 | + (apply merge-nfas (remove nil? nfas)))) |
106 | 108 |
|
107 | 109 | ;; NFAs for types |
108 | 110 |
|
|
112 | 114 | (let [stateS (gensym :S) |
113 | 115 | state1 (gensym :1) |
114 | 116 | state2 (gensym :2)] |
115 | | - (make-NFA (into #{} ) |
| 117 | + (make-NFA (into #{} (concat [\0] DIGITS DIGITS-NONZERO)) |
116 | 118 | #{stateS state1 state2} |
117 | 119 | stateS |
118 | | - {state2 (list :INTEGER 0)} |
119 | | - (make-transition-NFA [[stateS state1 e] |
120 | | - [state1 state2 DIGITS-NONZERO] |
| 120 | + {state1 (list :INTEGER-LITERAL 0) |
| 121 | + state2 (list :INTEGER-LITERAL 0)} |
| 122 | + (make-transition-NFA [[stateS state1 \0] |
| 123 | + [stateS state2 DIGITS-NONZERO] |
121 | 124 | [state2 state2 DIGITS]])))) |
122 | 125 |
|
123 | 126 | ;; String literal |
124 | | -;; \".*\" (\ shown for escaping ") |
| 127 | +;; \"(\\[btnfr\"\'\\] | ALL-ASCII)*\" (\ shown for escaping ") |
| 128 | +;; aka \"(.*)\" with escapes inside |
125 | 129 | (def string-literal-nfa |
126 | 130 | (let [stateS (gensym :S) |
127 | 131 | state1 (gensym :1) |
128 | | - state2 (gensym :2)] |
129 | | - (make-NFA (into #{} ) |
130 | | - #{stateS state1 state2} |
| 132 | + state2 (gensym :2) |
| 133 | + state3 (gensym :3)] |
| 134 | + (make-NFA (into #{} (concat [\'] ALL-ASCII [\\] ESCAPABLE)) |
| 135 | + #{stateS state1 state2 state3} |
| 136 | + stateS |
| 137 | + {state3 (list :STRING-LITERAL 0)} |
| 138 | + (make-transition-NFA [[stateS state1 \"] |
| 139 | + [state1 state1 ALL-ASCII] |
| 140 | + [state1 state2 \\] |
| 141 | + [state2 state1 ESCAPABLE] |
| 142 | + [state1 state3 \"]])))) |
| 143 | + |
| 144 | +;; Character literal |
| 145 | +;; \'(\\ESCAPABLE | ALL-ASCII)*\' (\ shown for escaping ") |
| 146 | +;; aka \'(.*)\' with escapes inside |
| 147 | +(def character-literal-nfa |
| 148 | + (let [stateS (gensym :S) |
| 149 | + state1 (gensym :1) |
| 150 | + state2 (gensym :2) |
| 151 | + state3 (gensym :3) |
| 152 | + state4 (gensym :4)] |
| 153 | + (make-NFA (into #{} (concat [\'] ALL-ASCII [\\] ESCAPABLE)) |
| 154 | + #{stateS state1 state2 state3 state4} |
131 | 155 | stateS |
132 | | - {state2 (list :STRING-LITERAL 0)} |
133 | | - (make-transition-NFA [[stateS state1 "\""] |
134 | | - [state1 state1 UPPER-ALPHABET] |
135 | | - [state1 state1 LOWER-ALPHABET] |
136 | | - [state1 state2 "\""]])))) |
| 156 | + {state4 (list :CHARACTER-LITERAL 0)} |
| 157 | + (make-transition-NFA [[stateS state1 \'] |
| 158 | + [state1 state3 ALL-ASCII] |
| 159 | + [state1 state2 \\] |
| 160 | + [state2 state3 ESCAPABLE] |
| 161 | + [state3 state4 \']])))) |
137 | 162 |
|
138 | 163 | ;; Identifiers |
139 | 164 | ;; [a-zA-Z][a-zA-Z0-9]* |
140 | 165 | (def identifier-nfa |
141 | 166 | (let [stateS (gensym :S) |
142 | 167 | state1 (gensym :s1) |
143 | 168 | state2 (gensym :s2)] |
144 | | - (make-NFA (into #{} ) |
| 169 | + (make-NFA (into #{} (concat UPPER-ALPHABET LOWER-ALPHABET DIGITS)) |
145 | 170 | #{stateS state1 state2} |
146 | 171 | stateS |
147 | 172 | {state1 (list :IDENTIFIER 1) |
|
153 | 178 | [state2 state2 UPPER-ALPHABET] |
154 | 179 | [state2 state2 LOWER-ALPHABET] |
155 | 180 | [state2 state2 DIGITS]])))) |
156 | | -;; Operators |
157 | | -(def operators-nfa |
158 | | - (form-multiple-nfas :OPERATOR ">" "<" "<<" ">>" ">>>" "<<<" ">>>=" ">>=" |
159 | | - ">=" "<=" "&" "&=" "=" "==" "!" "!=" "^=" "^" "+" "+=" |
160 | | - "++" "-" "-=" "--" "*" "*=" "/" "/=" "%" "%=")) |
161 | | - |
162 | | -;; white space? |
163 | | - |
164 | | -;; Keywords nfa |
165 | | -(def keywords-nfa |
166 | | - (form-multiple-nfas :KEYWORD |
167 | | - "abstract" |
168 | | - "default" |
169 | | - "if" |
170 | | - "private" |
171 | | - "this" |
172 | | - "boolean" |
173 | | - "do" |
174 | | - "implements" |
175 | | - "protected" |
176 | | - "break" |
177 | | - "double" |
178 | | - "import" |
179 | | - "public" |
180 | | - "throws" |
181 | | - "throw" |
182 | | - "byte" |
183 | | - "else" |
184 | | - "instanceof" |
185 | | - "return" |
186 | | - "transient" |
187 | | - "case" |
188 | | - "extends" |
189 | | - "int" |
190 | | - "short" |
191 | | - "try" |
192 | | - "catch" |
193 | | - "interface" |
194 | | - "static" |
195 | | - "void" |
196 | | - "char" |
197 | | - "finally" |
198 | | - "final" |
199 | | - "long" |
200 | | - "strictfp" |
201 | | - "volatile" |
202 | | - "class" |
203 | | - "float" |
204 | | - "native" |
205 | | - "super" |
206 | | - "while" |
207 | | - "const" |
208 | | - "for" |
209 | | - "new" |
210 | | - "switch" |
211 | | - "continue" |
212 | | - "goto" |
213 | | - "package" |
214 | | - "synchronized")) |
215 | | - |
216 | | -;; Booleans |
217 | | -(def boolean-nfa |
218 | | - (form-multiple-nfas :BOOLEAN "true" "false")) |
219 | | - |
220 | | -;; Brackets |
221 | | -(def bracket-nfa |
222 | | - (form-multiple-nfas :BRACKET "{" "}" "(" ")" "[" "]")) |
| 181 | + |
| 182 | +;; Whitespace |
| 183 | +;; [space tab newline]+ |
| 184 | +(def whitespace-nfa |
| 185 | + (let [stateS (gensym :S) |
| 186 | + state1 (gensym :s1)] |
| 187 | + (make-NFA (into #{} WHITESPACE) |
| 188 | + #{stateS state1} |
| 189 | + stateS |
| 190 | + {state1 (list :WHITESPACE 0)} |
| 191 | + (make-transition-NFA [[stateS state1 WHITESPACE] |
| 192 | + [state1 state1 WHITESPACE]])))) |
223 | 193 |
|
224 | 194 | ;; complete nfa from all of the individual RE nfas |
225 | | -;; int-literal |
226 | | -;; string-literal |
227 | | -;; identifiers |
228 | | -;; file specified nfas: |
229 | | -;; BRACKET |
230 | | -;; BOOLEAN |
231 | | -;; KEYWORD |
232 | | -;; UNARYOPERATOR |
233 | | -;; BINARYOPERATOR |
234 | | -;; ASSIGNMENTOPERATOR |
235 | | -;; TERMINAL |
236 | 195 | (def complete-nfa |
237 | 196 | (merge-nfas integer-literal-nfa |
238 | 197 | string-literal-nfa |
| 198 | + character-literal-nfa |
239 | 199 | identifier-nfa |
| 200 | + whitespace-nfa |
240 | 201 | fileFormed-nfa)) |
0 commit comments