1
+ use std:: collections:: HashMap ;
2
+ use std:: iter:: Peekable ;
3
+ use std:: str:: Chars ;
4
+ use crate :: token:: { KeywordTable , Token , TokenKind } ;
5
+
6
+ /// Лексер, разбирающий исходный текст на токены
7
+ pub struct Lexer {
8
+ current_line : u16 ,
9
+ }
10
+
11
+ impl Lexer {
12
+ /// Создает новый экземпляр лексера. Отсчет номеров строк токенов начинается с 1.
13
+ pub fn new ( ) -> Self { Lexer { current_line : 1 } }
14
+
15
+ /// Осуществляет лексинг переданного исходного текста. Возвращает вектор токенов, включающих в себя
16
+ /// как простой текст, так и токены инструкций препроцессора
17
+ pub fn lex ( & mut self , source : & str ) -> Vec < Token > {
18
+ let mut chars = source. chars ( ) . peekable ( ) ;
19
+ let mut tokens: Vec < Token > = Vec :: new ( ) ;
20
+ let keywords_table = KeywordTable :: new ( ) ;
21
+
22
+ while let Some ( char) = chars. peek ( ) {
23
+ match char {
24
+ '#' => {
25
+ chars. next ( ) ;
26
+
27
+ if match_char ( '!' , & mut chars) {
28
+ let token = Token :: new ( TokenKind :: Shebang , "#!" . to_string ( ) , self
29
+ . current_line , self . current_line ) ;
30
+ tokens. push ( token) ;
31
+ Lexer :: shebang_text ( self , & mut tokens, & mut chars) ;
32
+ } else {
33
+ let token = Token :: new ( TokenKind :: Hash , "#" . to_string ( ) , self
34
+ . current_line , self . current_line ) ;
35
+ tokens. push ( token) ;
36
+ Lexer :: preprocessor_line ( self , & mut tokens, & mut chars, & keywords_table) ;
37
+ }
38
+ }
39
+ _ => {
40
+ Lexer :: text ( self , & mut tokens, & mut chars) ;
41
+ }
42
+ }
43
+ }
44
+
45
+ tokens
46
+ }
47
+
48
+ fn shebang_text ( & mut self , tokens : & mut Vec < Token > , chars : & mut Peekable < Chars > ) {
49
+ let mut text_chars: Vec < char > = Vec :: new ( ) ;
50
+ let start_line = self . current_line ;
51
+ let end_line = self . current_line ;
52
+
53
+ while let Some ( char) = chars. peek ( ) {
54
+ match char {
55
+ '\n' => {
56
+ break ;
57
+ }
58
+ _ => {
59
+ text_chars. push ( * char) ;
60
+ chars. next ( ) ;
61
+ }
62
+ }
63
+ }
64
+
65
+ if !text_chars. is_empty ( ) {
66
+ let token = Token :: new ( TokenKind :: Text , text_chars. iter ( ) . collect ( ) , start_line, end_line) ;
67
+ tokens. push ( token) ;
68
+ }
69
+ }
70
+
71
+ fn preprocessor_line ( & mut self , tokens : & mut Vec < Token > , chars : & mut Peekable < Chars > , keywords : & KeywordTable ) {
72
+ while let Some ( char) = chars. peek ( ) {
73
+ match char {
74
+ '\n' => {
75
+ break ;
76
+ }
77
+ char if char. is_alphabetic ( ) || * char == '_' => {
78
+ let token = Lexer :: identifier ( self , chars, & keywords. table ) ;
79
+ tokens. push ( token) ;
80
+ }
81
+ '"' => {
82
+ let mut token = Lexer :: string ( self , chars) ;
83
+ token. token_kind = TokenKind :: Path ;
84
+ tokens. push ( token) ;
85
+ }
86
+ _ => {
87
+ chars. next ( ) ;
88
+ }
89
+ }
90
+ }
91
+ }
92
+
93
+ fn identifier ( & mut self , chars : & mut Peekable < Chars > , keywords : & HashMap < String , TokenKind > ) -> Token {
94
+ let mut text_chars: Vec < char > = Vec :: new ( ) ;
95
+
96
+ while let Some ( char) = chars. peek ( ) {
97
+ if char. is_alphabetic ( ) || * char == '_' {
98
+ text_chars. push ( * char) ;
99
+ chars. next ( ) ;
100
+ } else {
101
+ break ;
102
+ }
103
+ }
104
+
105
+ let identifier: String = String :: from_iter ( text_chars) ;
106
+ if let Some ( token_kind) = keywords. get ( identifier. to_uppercase ( ) . as_str ( ) ) {
107
+ return Token :: new ( * token_kind, identifier, self . current_line , self . current_line ) ;
108
+ }
109
+
110
+ return Token :: new ( TokenKind :: Identifier , identifier, self . current_line , self . current_line ) ;
111
+ }
112
+
113
+ fn text ( & mut self , tokens : & mut Vec < Token > , chars : & mut Peekable < Chars > ) {
114
+ let mut text_chars: Vec < char > = Vec :: new ( ) ;
115
+ let mut string_or_date = false ;
116
+ let start_line = self . current_line ;
117
+ let mut end_line = self . current_line ;
118
+
119
+ while let Some ( char) = chars. peek ( ) {
120
+ match char {
121
+ '#' => {
122
+ if !string_or_date {
123
+ break ;
124
+ } else {
125
+ text_chars. push ( * char) ;
126
+ chars. next ( ) ;
127
+ }
128
+ }
129
+ '"' | '\'' => {
130
+ if string_or_date == false {
131
+ string_or_date = true
132
+ } else {
133
+ string_or_date = false
134
+ }
135
+ text_chars. push ( * char) ;
136
+ chars. next ( ) ;
137
+ }
138
+ '\n' => {
139
+ self . current_line = self . current_line + 1 ;
140
+ end_line = end_line + 1 ;
141
+ text_chars. push ( * char) ;
142
+ chars. next ( ) ;
143
+ }
144
+ _ => {
145
+ text_chars. push ( * char) ;
146
+ chars. next ( ) ;
147
+ }
148
+ }
149
+ }
150
+
151
+ let token = Token :: new ( TokenKind :: Text , text_chars. into_iter ( ) . collect ( ) ,
152
+ start_line, end_line) ;
153
+ tokens. push ( token) ;
154
+ }
155
+
156
+ fn string ( & mut self , chars : & mut Peekable < Chars > ) -> Token {
157
+ let mut text_chars: Vec < char > = Vec :: new ( ) ;
158
+ // add first quote symbol
159
+ text_chars. push ( chars. next ( ) . unwrap ( ) ) ;
160
+ let start_line = self . current_line ;
161
+ let mut end_line = self . current_line ;
162
+
163
+ while let Some ( char) = chars. next ( ) {
164
+ match char {
165
+ char if char == '"' => {
166
+ text_chars. push ( char) ;
167
+ break ;
168
+ }
169
+ '\n' => {
170
+ self . current_line = self . current_line + 1 ;
171
+ end_line = end_line + 1 ;
172
+ text_chars. push ( char) ;
173
+ }
174
+ _ => { text_chars. push ( char) }
175
+ }
176
+ }
177
+
178
+ Token :: new ( TokenKind :: Text , text_chars. into_iter ( ) . collect ( ) , start_line, end_line)
179
+ }
180
+ }
181
+
182
+ fn match_char ( expected : char , chars : & mut Peekable < Chars > ) -> bool {
183
+ let next_char = chars. peek ( ) ;
184
+
185
+ if next_char. is_none ( ) {
186
+ return false ;
187
+ }
188
+
189
+ if next_char. is_some ( ) && * next_char. unwrap ( ) != expected {
190
+ return false ;
191
+ }
192
+
193
+ // Consume matched character
194
+ chars. next ( ) ;
195
+ true
196
+ }
0 commit comments