tangled
alpha
login
or
join now
gearsco.de
/
pearl
2
fork
atom
An Erlang lexer and syntax highlighter in Gleam
2
fork
atom
overview
issues
pulls
pipelines
Lex characters
gearsco.de
11 months ago
50df07b2
65b5b9f1
+180
-2
2 changed files
expand all
collapse all
unified
split
src
pearl
token.gleam
pearl.gleam
+178
src/pearl.gleam
···
19
19
until_end_of_line: Splitter,
20
20
string: Splitter,
21
21
quoted_atom: Splitter,
22
22
+
brace_escape_sequence: Splitter,
22
23
)
23
24
}
24
25
···
30
31
NumericSeparatorNotAllowed
31
32
ExpectedExponent
32
33
NumbersCannotEndAfterRadix
34
34
+
UnterminatedCharacter
35
35
+
UnterminatedEscapeSequence
33
36
}
34
37
35
38
pub fn new(source: String) -> Lexer {
···
47
50
until_end_of_line: splitter.new(["\n", "\r"]),
48
51
string: splitter.new(["\"", "\\"]),
49
52
quoted_atom: splitter.new(["'", "\\"]),
53
53
+
brace_escape_sequence: splitter.new(["}", "\n", "\r"]),
50
54
)
51
55
}
52
56
···
209
213
"\"" <> source -> lex_string(advance(lexer, source), "")
210
214
"'" <> source -> lex_quoted_atom(advance(lexer, source), "")
211
215
216
216
+
"$" <> source -> lex_character(advance(lexer, source))
217
217
+
212
218
_ ->
213
219
case string.pop_grapheme(lexer.source) {
214
220
Error(_) -> #(lexer, token.EndOfFile)
···
217
223
token.Unknown(char),
218
224
)
219
225
}
226
226
+
}
227
227
+
}
228
228
+
229
229
+
fn lex_character(lexer: Lexer) -> #(Lexer, Token) {
230
230
+
case lexer.source {
231
231
+
"\\" <> source -> {
232
232
+
let #(lexer, escape_sequence) =
233
233
+
lex_escape_sequence(advance(lexer, source))
234
234
+
#(lexer, token.Character("\\" <> escape_sequence))
235
235
+
}
236
236
+
_ ->
237
237
+
case string.pop_grapheme(lexer.source) {
238
238
+
Ok(#(source, char)) -> #(advance(lexer, source), token.Character(char))
239
239
+
Error(_) -> #(error(lexer, UnterminatedCharacter), token.Character(""))
240
240
+
}
241
241
+
}
242
242
+
}
243
243
+
244
244
+
fn lex_escape_sequence(lexer: Lexer) -> #(Lexer, String) {
245
245
+
case lexer.source {
246
246
+
"^a" as sequence <> source
247
247
+
| "^b" as sequence <> source
248
248
+
| "^c" as sequence <> source
249
249
+
| "^d" as sequence <> source
250
250
+
| "^e" as sequence <> source
251
251
+
| "^f" as sequence <> source
252
252
+
| "^g" as sequence <> source
253
253
+
| "^h" as sequence <> source
254
254
+
| "^i" as sequence <> source
255
255
+
| "^j" as sequence <> source
256
256
+
| "^k" as sequence <> source
257
257
+
| "^l" as sequence <> source
258
258
+
| "^m" as sequence <> source
259
259
+
| "^n" as sequence <> source
260
260
+
| "^o" as sequence <> source
261
261
+
| "^p" as sequence <> source
262
262
+
| "^q" as sequence <> source
263
263
+
| "^r" as sequence <> source
264
264
+
| "^s" as sequence <> source
265
265
+
| "^t" as sequence <> source
266
266
+
| "^u" as sequence <> source
267
267
+
| "^v" as sequence <> source
268
268
+
| "^w" as sequence <> source
269
269
+
| "^x" as sequence <> source
270
270
+
| "^y" as sequence <> source
271
271
+
| "^z" as sequence <> source
272
272
+
| "^A" as sequence <> source
273
273
+
| "^B" as sequence <> source
274
274
+
| "^C" as sequence <> source
275
275
+
| "^D" as sequence <> source
276
276
+
| "^E" as sequence <> source
277
277
+
| "^F" as sequence <> source
278
278
+
| "^G" as sequence <> source
279
279
+
| "^H" as sequence <> source
280
280
+
| "^I" as sequence <> source
281
281
+
| "^J" as sequence <> source
282
282
+
| "^K" as sequence <> source
283
283
+
| "^L" as sequence <> source
284
284
+
| "^M" as sequence <> source
285
285
+
| "^N" as sequence <> source
286
286
+
| "^O" as sequence <> source
287
287
+
| "^P" as sequence <> source
288
288
+
| "^Q" as sequence <> source
289
289
+
| "^R" as sequence <> source
290
290
+
| "^S" as sequence <> source
291
291
+
| "^T" as sequence <> source
292
292
+
| "^U" as sequence <> source
293
293
+
| "^V" as sequence <> source
294
294
+
| "^W" as sequence <> source
295
295
+
| "^X" as sequence <> source
296
296
+
| "^Y" as sequence <> source
297
297
+
| "^Z" as sequence <> source
298
298
+
| "^@" as sequence <> source
299
299
+
| "^[" as sequence <> source
300
300
+
| "^\\" as sequence <> source
301
301
+
| "^]" as sequence <> source
302
302
+
| "^^" as sequence <> source
303
303
+
| "^_" as sequence <> source
304
304
+
| "^?" as sequence <> source -> #(advance(lexer, source), sequence)
305
305
+
306
306
+
"x{" <> source -> lex_brace_escape_sequence(advance(lexer, source))
307
307
+
"x" <> source -> lex_hex_escape_sequence(advance(lexer, source))
308
308
+
309
309
+
"0" as char <> source
310
310
+
| "1" as char <> source
311
311
+
| "2" as char <> source
312
312
+
| "3" as char <> source
313
313
+
| "4" as char <> source
314
314
+
| "5" as char <> source
315
315
+
| "6" as char <> source
316
316
+
| "7" as char <> source ->
317
317
+
lex_octal_escape_sequence(advance(lexer, source), char)
318
318
+
319
319
+
_ ->
320
320
+
case string.pop_grapheme(lexer.source) {
321
321
+
Error(_) -> #(error(lexer, UnterminatedEscapeSequence), "")
322
322
+
Ok(#(source, char)) -> #(advance(lexer, source), char)
323
323
+
}
324
324
+
}
325
325
+
}
326
326
+
327
327
+
fn lex_octal_escape_sequence(lexer: Lexer, first: String) -> #(Lexer, String) {
328
328
+
case extract_octal_digit(lexer) {
329
329
+
Error(_) -> #(lexer, first)
330
330
+
Ok(#(lexer, second)) ->
331
331
+
case extract_octal_digit(lexer) {
332
332
+
Error(_) -> #(lexer, first <> second)
333
333
+
Ok(#(lexer, third)) -> #(lexer, first <> second <> third)
334
334
+
}
335
335
+
}
336
336
+
}
337
337
+
338
338
+
fn extract_octal_digit(lexer: Lexer) -> Result(#(Lexer, String), Nil) {
339
339
+
case lexer.source {
340
340
+
"0" as char <> source
341
341
+
| "1" as char <> source
342
342
+
| "2" as char <> source
343
343
+
| "3" as char <> source
344
344
+
| "4" as char <> source
345
345
+
| "5" as char <> source
346
346
+
| "6" as char <> source
347
347
+
| "7" as char <> source -> Ok(#(advance(lexer, source), char))
348
348
+
_ -> Error(Nil)
349
349
+
}
350
350
+
}
351
351
+
352
352
+
fn lex_hex_escape_sequence(lexer: Lexer) -> #(Lexer, String) {
353
353
+
case extract_hex_digit(lexer) {
354
354
+
Error(_) -> #(error(lexer, UnterminatedEscapeSequence), "x")
355
355
+
Ok(#(lexer, first)) ->
356
356
+
case extract_hex_digit(lexer) {
357
357
+
Error(_) -> #(error(lexer, UnterminatedEscapeSequence), "x" <> first)
358
358
+
Ok(#(lexer, second)) -> #(lexer, "x" <> first <> second)
359
359
+
}
360
360
+
}
361
361
+
}
362
362
+
363
363
+
fn extract_hex_digit(lexer: Lexer) -> Result(#(Lexer, String), Nil) {
364
364
+
case lexer.source {
365
365
+
"0" as char <> source
366
366
+
| "1" as char <> source
367
367
+
| "2" as char <> source
368
368
+
| "3" as char <> source
369
369
+
| "4" as char <> source
370
370
+
| "5" as char <> source
371
371
+
| "6" as char <> source
372
372
+
| "7" as char <> source
373
373
+
| "8" as char <> source
374
374
+
| "9" as char <> source
375
375
+
| "a" as char <> source
376
376
+
| "b" as char <> source
377
377
+
| "c" as char <> source
378
378
+
| "d" as char <> source
379
379
+
| "e" as char <> source
380
380
+
| "f" as char <> source
381
381
+
| "A" as char <> source
382
382
+
| "B" as char <> source
383
383
+
| "C" as char <> source
384
384
+
| "D" as char <> source
385
385
+
| "E" as char <> source
386
386
+
| "F" as char <> source -> Ok(#(advance(lexer, source), char))
387
387
+
_ -> Error(Nil)
388
388
+
}
389
389
+
}
390
390
+
391
391
+
fn lex_brace_escape_sequence(lexer: Lexer) -> #(Lexer, String) {
392
392
+
case splitter.split(lexer.splitters.brace_escape_sequence, lexer.source) {
393
393
+
#(before, "}", after) -> #(advance(lexer, after), "x{" <> before <> "}")
394
394
+
#(before, separator, after) -> #(
395
395
+
advance(error(lexer, UnterminatedEscapeSequence), separator <> after),
396
396
+
"x{" <> before,
397
397
+
)
220
398
}
221
399
}
222
400
+2
-2
src/pearl/token.gleam
···
6
6
ModuleComment(String)
7
7
EndOfFile
8
8
9
9
-
Char(String)
9
9
+
Character(String)
10
10
Integer(String)
11
11
Float(String)
12
12
Atom(name: String, quoted: Bool)
···
107
107
ModuleComment(contents) -> "%%%" <> contents
108
108
EndOfFile -> ""
109
109
110
110
-
Char(char) -> "$" <> char
110
110
+
Character(char) -> "$" <> char
111
111
Integer(int) -> int
112
112
Float(float) -> float
113
113
Atom(name:, quoted: True) -> "'" <> name <> "'"