Skip to content
This repository was archived by the owner on Sep 6, 2024. It is now read-only.

Commit df42f7a

Browse files
committed
escapes, escapes
1 parent d0c4212 commit df42f7a

5 files changed

Lines changed: 151 additions & 205 deletions

File tree

src/watcompiler/Tokens.txt

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
BRACKET { } ( ) [ ]
2-
BOOLEAN true false
32
KEYWORD abstract default if private this boolean do implements protected break double import public throws throw byte else instanceof return transient case extends int short try catch interface static void char finally final long strictfp volatile class float native super while const for new switch continue goto package synchronized
4-
UNARYOPERATOR + ++ - -- ! ~
5-
BINARYOPERATOR == * / % < << > >> >>> & ^ | != >= <=
3+
UNARYOPERATOR ++ - -- ! ~
4+
BINARYOPERATOR == * / % < << > >> >>> & ^ | != >= <= +
65
ASSIGNMENTOPERATOR = *= /= %= += -= <<= >>= >>>= &= ^= |=
7-
TERMINAL ; " ,
8-
6+
TERMINAL ; ,
7+
BOOLEAN-LITERAL true false
8+
NULL-LITERAL null
9+
INT-LITERAL <numbers>
10+
STRING-LITERAL <string of anything>
11+
CHARACTER-LITERAL <character or escape>
12+
IDENTIFIER <alphanumeric>
13+
WHITESPACE <spaces,tabs,newlines>

src/watcompiler/lang.clj

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@
2121
(def DIGITS-NONZERO
2222
(char-range \1 \9))
2323

24+
(def ALL-ASCII
25+
(char-range 32 126))
26+
27+
(def ESCAPABLE
28+
[\b \t \n \f \r \" \' \\])
29+
2430
(def S-PLUS \+)
2531
(def S-MINUS \-)
2632
(def S-STAR \*)

src/watcompiler/nfa.clj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@
7979
transition-map
8080
(let [[s-from s-to alphabets] (first remaining)]
8181
(recur (rest remaining)
82-
(if (seq? alphabets)
82+
(if (or (seq? alphabets) (vector? alphabets))
8383
(reduce #(add-to-map %1 (list s-from %2) s-to)
8484
transition-map
8585
alphabets)

src/watcompiler/re.clj

Lines changed: 69 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,9 @@
3030

3131
;; Parses a string to form the nfa
3232
(defn string-to-nfa
33-
[word wordtype]
33+
[wordtype word]
3434
(let
3535
[stateS (gensym :s)
36-
3736
;; List of substrings of word, stored as strings
3837
states-map (set (rest (reductions str (str) word)))
3938

@@ -51,7 +50,7 @@
5150
transitions-map (into #{ [stateS (get gensym-map (str (first (seq word))) \a) (first (seq word))] }
5251
(for [v (partition 2 1 (vals gensym-map))]
5352
[(first v) (second v) (get states-char-map (second v))]))]
54-
(make-NFA (into #{} )
53+
(make-NFA (into #{} (concat (seq word)))
5554
states-map
5655
stateS
5756
accept-states-map
@@ -67,7 +66,7 @@
6766
args (rest arguments)
6867
;; Key: string for keyword, Value: NFA for that keyword
6968
strings-nfas (into (sorted-map) (for [nfa-name args]
70-
[nfa-name (string-to-nfa nfa-name class)]))
69+
[nfa-name (string-to-nfa class nfa-name)]))
7170
all-states (apply union (map :states (vals strings-nfas)))
7271
all-accept-states (apply union (map :accept-states (vals strings-nfas)))
7372
merged-transitions (apply merge (map :transitions (vals strings-nfas)))
@@ -88,21 +87,24 @@
8887

8988
;; Reading the file
9089
(def readFile
91-
(with-open [rdr (clojure.java.io/reader "src/watcompiler/Tokens.txt")]
92-
(reduce conj [] (line-seq rdr))))
93-
94-
;; Splitting the lines by space
95-
(def splitLines
9690
(into []
97-
(for [x readFile]
98-
(str/split x #" "))))
91+
(for [line (str/split-lines (slurp "src/watcompiler/Tokens.txt"))]
92+
(str/split line #" "))))
9993

10094
(def fileFormed-nfa
101-
(let [nfas
102-
(into []
103-
(for [x splitLines]
104-
(apply form-multiple-nfas x)))]
105-
(apply merge-nfas nfas)))
95+
(let [nfas (into []
96+
(for [x readFile]
97+
;; Check for regex based NFAs
98+
(if
99+
(or
100+
(if (= (first x) "IDENTIFIER") true false)
101+
(if (= (first x) "INT-LITERAL") true false)
102+
(if (= (first x) "STRING-LITERAL") true false)
103+
(if (= (first x) "CHARACTER-LITERAL") true false)
104+
(if (= (first x) "WHITESPACE") true false))
105+
nil
106+
(apply form-multiple-nfas x))))]
107+
(apply merge-nfas (remove nil? nfas))))
106108

107109
;; NFAs for types
108110

@@ -112,36 +114,59 @@
112114
(let [stateS (gensym :S)
113115
state1 (gensym :1)
114116
state2 (gensym :2)]
115-
(make-NFA (into #{} )
117+
(make-NFA (into #{} (concat [\0] DIGITS DIGITS-NONZERO))
116118
#{stateS state1 state2}
117119
stateS
118-
{state2 (list :INTEGER 0)}
119-
(make-transition-NFA [[stateS state1 e]
120-
[state1 state2 DIGITS-NONZERO]
120+
{state1 (list :INTEGER-LITERAL 0)
121+
state2 (list :INTEGER-LITERAL 0)}
122+
(make-transition-NFA [[stateS state1 \0]
123+
[stateS state2 DIGITS-NONZERO]
121124
[state2 state2 DIGITS]]))))
122125

123126
;; String literal
124-
;; \".*\" (\ shown for escaping ")
127+
;; \"(\\[btnfr\"\'\\] | ALL-ASCII)*\" (\ shown for escaping ")
128+
;; aka \"(.*)\" with escapes inside
125129
(def string-literal-nfa
126130
(let [stateS (gensym :S)
127131
state1 (gensym :1)
128-
state2 (gensym :2)]
129-
(make-NFA (into #{} )
130-
#{stateS state1 state2}
132+
state2 (gensym :2)
133+
state3 (gensym :3)]
134+
(make-NFA (into #{} (concat [\'] ALL-ASCII [\\] ESCAPABLE))
135+
#{stateS state1 state2 state3}
136+
stateS
137+
{state3 (list :STRING-LITERAL 0)}
138+
(make-transition-NFA [[stateS state1 \"]
139+
[state1 state1 ALL-ASCII]
140+
[state1 state2 \\]
141+
[state2 state1 ESCAPABLE]
142+
[state1 state3 \"]]))))
143+
144+
;; Character literal
145+
;; \'(\\ESCAPABLE | ALL-ASCII)*\' (\ shown for escaping ")
146+
;; aka \'(.*)\' with escapes inside
147+
(def character-literal-nfa
148+
(let [stateS (gensym :S)
149+
state1 (gensym :1)
150+
state2 (gensym :2)
151+
state3 (gensym :3)
152+
state4 (gensym :4)]
153+
(make-NFA (into #{} (concat [\'] ALL-ASCII [\\] ESCAPABLE))
154+
#{stateS state1 state2 state3 state4}
131155
stateS
132-
{state2 (list :STRING-LITERAL 0)}
133-
(make-transition-NFA [[stateS state1 "\""]
134-
[state1 state1 UPPER-ALPHABET]
135-
[state1 state1 LOWER-ALPHABET]
136-
[state1 state2 "\""]]))))
156+
{state4 (list :CHARACTER-LITERAL 0)}
157+
(make-transition-NFA [[stateS state1 \']
158+
[state1 state3 ALL-ASCII]
159+
[state1 state2 \\]
160+
[state2 state3 ESCAPABLE]
161+
[state3 state4 \']]))))
137162

138163
;; Identifiers
139164
;; [a-zA-Z][a-zA-Z0-9]*
140165
(def identifier-nfa
141166
(let [stateS (gensym :S)
142167
state1 (gensym :s1)
143168
state2 (gensym :s2)]
144-
(make-NFA (into #{} )
169+
(make-NFA (into #{} (concat UPPER-ALPHABET LOWER-ALPHABET DIGITS))
145170
#{stateS state1 state2}
146171
stateS
147172
{state1 (list :IDENTIFIER 1)
@@ -153,88 +178,24 @@
153178
[state2 state2 UPPER-ALPHABET]
154179
[state2 state2 LOWER-ALPHABET]
155180
[state2 state2 DIGITS]]))))
156-
;; Operators
157-
(def operators-nfa
158-
(form-multiple-nfas :OPERATOR ">" "<" "<<" ">>" ">>>" "<<<" ">>>=" ">>="
159-
">=" "<=" "&" "&=" "=" "==" "!" "!=" "^=" "^" "+" "+="
160-
"++" "-" "-=" "--" "*" "*=" "/" "/=" "%" "%="))
161-
162-
;; white space?
163-
164-
;; Keywords nfa
165-
(def keywords-nfa
166-
(form-multiple-nfas :KEYWORD
167-
"abstract"
168-
"default"
169-
"if"
170-
"private"
171-
"this"
172-
"boolean"
173-
"do"
174-
"implements"
175-
"protected"
176-
"break"
177-
"double"
178-
"import"
179-
"public"
180-
"throws"
181-
"throw"
182-
"byte"
183-
"else"
184-
"instanceof"
185-
"return"
186-
"transient"
187-
"case"
188-
"extends"
189-
"int"
190-
"short"
191-
"try"
192-
"catch"
193-
"interface"
194-
"static"
195-
"void"
196-
"char"
197-
"finally"
198-
"final"
199-
"long"
200-
"strictfp"
201-
"volatile"
202-
"class"
203-
"float"
204-
"native"
205-
"super"
206-
"while"
207-
"const"
208-
"for"
209-
"new"
210-
"switch"
211-
"continue"
212-
"goto"
213-
"package"
214-
"synchronized"))
215-
216-
;; Booleans
217-
(def boolean-nfa
218-
(form-multiple-nfas :BOOLEAN "true" "false"))
219-
220-
;; Brackets
221-
(def bracket-nfa
222-
(form-multiple-nfas :BRACKET "{" "}" "(" ")" "[" "]"))
181+
182+
;; Whitespace
183+
;; [space tab newline]+
184+
(def whitespace-nfa
185+
(let [stateS (gensym :S)
186+
state1 (gensym :s1)]
187+
(make-NFA (into #{} WHITESPACE)
188+
#{stateS state1}
189+
stateS
190+
{state1 (list :WHITESPACE 0)}
191+
(make-transition-NFA [[stateS state1 WHITESPACE]
192+
[state1 state1 WHITESPACE]]))))
223193

224194
;; complete nfa from all of the individual RE nfas
225-
;; int-literal
226-
;; string-literal
227-
;; identifiers
228-
;; file specified nfas:
229-
;; BRACKET
230-
;; BOOLEAN
231-
;; KEYWORD
232-
;; UNARYOPERATOR
233-
;; BINARYOPERATOR
234-
;; ASSIGNMENTOPERATOR
235-
;; TERMINAL
236195
(def complete-nfa
237196
(merge-nfas integer-literal-nfa
238197
string-literal-nfa
198+
character-literal-nfa
239199
identifier-nfa
200+
whitespace-nfa
240201
fileFormed-nfa))

0 commit comments

Comments
 (0)