Skip to content

Commit 3cf7298

Browse files
committed
Basic Lexing
Lexer done
1 parent 10e0476 commit 3cf7298

4 files changed

Lines changed: 235 additions & 1 deletion

File tree

Cargo.lock

Lines changed: 52 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ edition = "2024"
55

66
[dependencies]
77
clap = { version = "4.5.55", features = ["derive"] }
8+
rayon = "1.11.0"

src/lexer.rs

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
#[derive(Clone, Debug)]
2+
pub enum TokenType {
3+
ParensOpen,
4+
ParensClosed,
5+
BracesOpen,
6+
BracesClosed,
7+
Arrow,
8+
Colon,
9+
Semicolon,
10+
Newline,
11+
Whitespace,
12+
Identifier(String),
13+
Integer(String),
14+
Dot,
15+
Plus,
16+
Minus,
17+
Mul,
18+
Div,
19+
Remainder,
20+
And,
21+
Or,
22+
Xor,
23+
Not,
24+
Equals,
25+
NotEquals,
26+
GreaterThan,
27+
GreaterEquals,
28+
LesserThan,
29+
LesserEquals,
30+
If,
31+
Else,
32+
Loop,
33+
Function,
34+
Assign,
35+
Let,
36+
While,
37+
Debug,
38+
}
39+
40+
#[derive(Clone, Debug)]
41+
pub struct DataToken {
42+
pub ty: TokenType,
43+
pub pos: (usize /* Line */, usize /* Coloumn */),
44+
}
45+
46+
macro_rules! token {
47+
($t:ident, $ln:expr, $ci:expr, $ty:ident) => {
48+
$t.push(DataToken {
49+
ty: TokenType::$ty,
50+
pos: ($ln+1, $ci),
51+
})
52+
};
53+
}
54+
55+
pub fn to_tokens(text: &str) -> Vec<DataToken> {
56+
text.lines().enumerate().flat_map(|(line_num, line)| {
57+
let mut tokens: Vec<DataToken> = Vec::new();
58+
59+
let mut char_idx = 0;
60+
let mut chars = line.chars().peekable();
61+
62+
while let Some(curr_char) = chars.next() {
63+
char_idx += 1;
64+
65+
match curr_char {
66+
'(' => token!(tokens, line_num, char_idx, ParensOpen),
67+
')' => token!(tokens, line_num, char_idx, ParensClosed),
68+
'{' => token!(tokens, line_num, char_idx, BracesOpen),
69+
'}' => token!(tokens, line_num, char_idx, BracesClosed),
70+
':' => token!(tokens, line_num, char_idx, Colon),
71+
';' => token!(tokens, line_num, char_idx, Semicolon),
72+
'.' => token!(tokens, line_num, char_idx, Dot),
73+
'+' => token!(tokens, line_num, char_idx, Plus),
74+
'*' => token!(tokens, line_num, char_idx, Mul),
75+
'/' => token!(tokens, line_num, char_idx, Div),
76+
'%' => token!(tokens, line_num, char_idx, Remainder),
77+
'^' => token!(tokens, line_num, char_idx, Xor),
78+
'=' => if chars.peek() == Some(&'=') {
79+
token!(tokens, line_num, char_idx, Equals);
80+
chars.next();
81+
char_idx += 1;
82+
} else {
83+
token!(tokens, line_num, char_idx, Assign)
84+
},
85+
'-' => if chars.peek() == Some(&'>') {
86+
token!(tokens, line_num, char_idx, Arrow);
87+
chars.next();
88+
char_idx += 1;
89+
} else {
90+
token!(tokens, line_num, char_idx, Minus)
91+
}
92+
'!' => if chars.peek() == Some(&'=') {
93+
token!(tokens, line_num, char_idx, NotEquals);
94+
chars.next();
95+
char_idx += 1;
96+
} else {
97+
token!(tokens, line_num, char_idx, Not)
98+
},
99+
'>' => if chars.peek() == Some(&'=') {
100+
token!(tokens, line_num, char_idx, GreaterEquals);
101+
chars.next();
102+
char_idx += 1;
103+
} else {
104+
token!(tokens, line_num, char_idx, GreaterThan)
105+
},
106+
'<' => if chars.peek() == Some(&'<') {
107+
token!(tokens, line_num, char_idx, LesserEquals);
108+
chars.next();
109+
char_idx += 1;
110+
} else {
111+
token!(tokens, line_num, char_idx, LesserThan)
112+
},
113+
'&' => if chars.peek() == Some(&'&') {
114+
token!(tokens, line_num, char_idx, And);
115+
chars.next();
116+
char_idx += 1;
117+
} else {
118+
panic!("Invalid `And` Symbol, expected `&&`, got `{:?}`", chars.peek())
119+
},
120+
'|' => if chars.peek() == Some(&'|') {
121+
token!(tokens, line_num, char_idx, Or);
122+
chars.next();
123+
char_idx += 1;
124+
} else {
125+
panic!("Invalid `Or` Symbol, expected `||`, got `{:?}`", chars.peek())
126+
},
127+
unidentified => if unidentified.is_whitespace() {
128+
token!(tokens, line_num, char_idx, Whitespace)
129+
} else {
130+
let op = if unidentified.is_ascii_digit() {
131+
char::is_ascii_digit
132+
} else {
133+
char::is_ascii_alphabetic
134+
};
135+
136+
let i = char_idx - 1;
137+
let mut j = i;
138+
139+
#[allow(clippy::while_let_on_iterator)]
140+
while let Some(c) = chars.peek() {
141+
if !op(c) || c.is_whitespace() {
142+
break;
143+
} else {
144+
chars.next();
145+
char_idx += 1;
146+
j += 1;
147+
}
148+
}
149+
j += 1;
150+
151+
let text = &line[i..j];
152+
153+
if unidentified.is_numeric() {
154+
tokens.push(DataToken { pos: (line_num+1, i+1), ty: TokenType::Integer(text.to_string()) })
155+
} else if unidentified.is_ascii_alphabetic() {
156+
match text {
157+
"if" => token!(tokens, line_num, i+1, If),
158+
"else" => token!(tokens, line_num, i+1, Else),
159+
"loop" => token!(tokens, line_num, i+1, Loop),
160+
"fn" => token!(tokens, line_num, i+1, Function),
161+
"let" => token!(tokens, line_num, i+1, Let),
162+
"while" => token!(tokens, line_num, i+1, While),
163+
"debug" => token!(tokens, line_num, i+1, Debug),
164+
ident => tokens.push(DataToken { pos: (line_num+1, i+1), ty: TokenType::Identifier(ident.to_string()) })
165+
}
166+
}
167+
},
168+
}
169+
}
170+
171+
tokens.push(DataToken { ty: TokenType::Newline, pos: (line_num + 1, char_idx + 1)});
172+
173+
tokens
174+
}).collect::<Vec<DataToken>>()
175+
}

src/main.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
use std::path::PathBuf;
1+
use std::{fs, path::PathBuf};
22

33
use clap::Parser;
44

5+
mod lexer;
6+
57
#[derive(Debug, Clone, Parser)]
68
struct Args {
79
#[arg()]
@@ -11,4 +13,8 @@ struct Args {
1113

1214
fn main() {
1315
let args = Args::parse();
16+
17+
let input_text = fs::read_to_string(args.input).expect("Could not read input file");
18+
19+
eprintln!("{:?}", lexer::to_tokens(&input_text));
1420
}

0 commit comments

Comments
 (0)