tangled
alpha
login
or
join now
gearsco.de
/
pearl
2
fork
atom
An Erlang lexer and syntax highlighter in Gleam
2
fork
atom
overview
issues
pulls
pipelines
Lex floats
gearsco.de
11 months ago
65b5b9f1
323fc719
+102
-41
1 changed file
expand all
collapse all
unified
split
src
pearl.gleam
+102
-41
src/pearl.gleam
···
27
27
UnterminatedString
28
28
UnterminatedAtom
29
29
InvalidRadix(radix: String)
30
30
+
NumericSeparatorNotAllowed
31
31
+
ExpectedExponent
32
32
+
NumbersCannotEndAfterRadix
30
33
}
31
34
32
35
pub fn new(source: String) -> Lexer {
···
200
203
| "6" as char <> source
201
204
| "7" as char <> source
202
205
| "8" as char <> source
203
203
-
| "9" as char <> source -> lex_number(advance(lexer, source), char, Initial)
206
206
+
| "9" as char <> source ->
207
207
+
lex_number(advance(lexer, source), char, Initial, AfterNumber)
204
208
205
209
"\"" <> source -> lex_string(advance(lexer, source), "")
206
210
"'" <> source -> lex_quoted_atom(advance(lexer, source), "")
···
219
223
type LexNumberMode {
220
224
Initial
221
225
Radix(Int)
226
226
+
Decimal
227
227
+
Exponent
228
228
+
}
229
229
+
230
230
+
type DelimitedPosition {
231
231
+
AfterDecimal
232
232
+
AfterNumber
233
233
+
AfterSeparator
234
234
+
AfterExponent
235
235
+
AfterRadix
222
236
}
223
237
224
238
fn lex_number(
225
239
lexer: Lexer,
226
240
lexed: String,
227
241
mode: LexNumberMode,
242
242
+
position: DelimitedPosition,
228
243
) -> #(Lexer, Token) {
229
244
let radix = case mode {
230
230
-
Initial -> 10
231
245
Radix(r) -> r
246
246
+
Initial | Decimal | Exponent -> 10
232
247
}
233
248
234
249
case lexer.source {
235
250
"0" as char <> source | "1" as char <> source ->
236
236
-
lex_number(advance(lexer, source), lexed <> char, mode)
251
251
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
237
252
"2" as char <> source if radix >= 3 ->
238
238
-
lex_number(advance(lexer, source), lexed <> char, mode)
253
253
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
239
254
"3" as char <> source if radix >= 4 ->
240
240
-
lex_number(advance(lexer, source), lexed <> char, mode)
255
255
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
241
256
"4" as char <> source if radix >= 5 ->
242
242
-
lex_number(advance(lexer, source), lexed <> char, mode)
257
257
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
243
258
"5" as char <> source if radix >= 6 ->
244
244
-
lex_number(advance(lexer, source), lexed <> char, mode)
259
259
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
245
260
"6" as char <> source if radix >= 7 ->
246
246
-
lex_number(advance(lexer, source), lexed <> char, mode)
261
261
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
247
262
"7" as char <> source if radix >= 8 ->
248
248
-
lex_number(advance(lexer, source), lexed <> char, mode)
263
263
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
249
264
"8" as char <> source if radix >= 9 ->
250
250
-
lex_number(advance(lexer, source), lexed <> char, mode)
265
265
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
251
266
"9" as char <> source if radix >= 10 ->
252
252
-
lex_number(advance(lexer, source), lexed <> char, mode)
267
267
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
253
268
"a" as char <> source | "A" as char <> source if radix >= 11 ->
254
254
-
lex_number(advance(lexer, source), lexed <> char, mode)
269
269
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
255
270
"b" as char <> source | "B" as char <> source if radix >= 12 ->
256
256
-
lex_number(advance(lexer, source), lexed <> char, mode)
271
271
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
257
272
"c" as char <> source | "C" as char <> source if radix >= 13 ->
258
258
-
lex_number(advance(lexer, source), lexed <> char, mode)
273
273
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
259
274
"d" as char <> source | "D" as char <> source if radix >= 14 ->
260
260
-
lex_number(advance(lexer, source), lexed <> char, mode)
275
275
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
261
276
"e" as char <> source | "E" as char <> source if radix >= 15 ->
262
262
-
lex_number(advance(lexer, source), lexed <> char, mode)
277
277
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
263
278
"f" as char <> source | "F" as char <> source if radix >= 16 ->
264
264
-
lex_number(advance(lexer, source), lexed <> char, mode)
279
279
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
265
280
"g" as char <> source | "G" as char <> source if radix >= 17 ->
266
266
-
lex_number(advance(lexer, source), lexed <> char, mode)
281
281
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
267
282
"h" as char <> source | "H" as char <> source if radix >= 18 ->
268
268
-
lex_number(advance(lexer, source), lexed <> char, mode)
283
283
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
269
284
"i" as char <> source | "I" as char <> source if radix >= 19 ->
270
270
-
lex_number(advance(lexer, source), lexed <> char, mode)
285
285
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
271
286
"j" as char <> source | "J" as char <> source if radix >= 20 ->
272
272
-
lex_number(advance(lexer, source), lexed <> char, mode)
287
287
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
273
288
"k" as char <> source | "K" as char <> source if radix >= 21 ->
274
274
-
lex_number(advance(lexer, source), lexed <> char, mode)
289
289
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
275
290
"l" as char <> source | "L" as char <> source if radix >= 22 ->
276
276
-
lex_number(advance(lexer, source), lexed <> char, mode)
291
291
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
277
292
"m" as char <> source | "M" as char <> source if radix >= 23 ->
278
278
-
lex_number(advance(lexer, source), lexed <> char, mode)
293
293
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
279
294
"n" as char <> source | "N" as char <> source if radix >= 24 ->
280
280
-
lex_number(advance(lexer, source), lexed <> char, mode)
295
295
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
281
296
"o" as char <> source | "O" as char <> source if radix >= 25 ->
282
282
-
lex_number(advance(lexer, source), lexed <> char, mode)
297
297
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
283
298
"p" as char <> source | "P" as char <> source if radix >= 26 ->
284
284
-
lex_number(advance(lexer, source), lexed <> char, mode)
299
299
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
285
300
"q" as char <> source | "Q" as char <> source if radix >= 27 ->
286
286
-
lex_number(advance(lexer, source), lexed <> char, mode)
301
301
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
287
302
"r" as char <> source | "R" as char <> source if radix >= 28 ->
288
288
-
lex_number(advance(lexer, source), lexed <> char, mode)
303
303
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
289
304
"s" as char <> source | "S" as char <> source if radix >= 29 ->
290
290
-
lex_number(advance(lexer, source), lexed <> char, mode)
305
305
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
291
306
"t" as char <> source | "T" as char <> source if radix >= 30 ->
292
292
-
lex_number(advance(lexer, source), lexed <> char, mode)
307
307
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
293
308
"u" as char <> source | "U" as char <> source if radix >= 31 ->
294
294
-
lex_number(advance(lexer, source), lexed <> char, mode)
309
309
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
295
310
"v" as char <> source | "V" as char <> source if radix >= 32 ->
296
296
-
lex_number(advance(lexer, source), lexed <> char, mode)
311
311
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
297
312
"w" as char <> source | "W" as char <> source if radix >= 33 ->
298
298
-
lex_number(advance(lexer, source), lexed <> char, mode)
313
313
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
299
314
"x" as char <> source | "X" as char <> source if radix >= 34 ->
300
300
-
lex_number(advance(lexer, source), lexed <> char, mode)
315
315
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
301
316
"y" as char <> source | "Y" as char <> source if radix >= 35 ->
302
302
-
lex_number(advance(lexer, source), lexed <> char, mode)
317
317
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
303
318
"z" as char <> source | "Z" as char <> source if radix >= 36 ->
304
304
-
lex_number(advance(lexer, source), lexed <> char, mode)
319
319
+
lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber)
305
320
306
306
-
"#" <> source if mode == Initial ->
307
307
-
case int.parse(lexed) {
321
321
+
"#" <> source if mode == Initial && position == AfterNumber ->
322
322
+
case int.parse(string.replace(in: lexed, each: "_", with: "")) {
308
323
Error(_) -> #(
309
324
error(advance(lexer, source), InvalidRadix(lexed)),
310
325
token.Integer(lexed),
···
314
329
token.Integer(lexed),
315
330
)
316
331
Ok(radix) ->
317
317
-
lex_number(advance(lexer, source), lexed <> "#", Radix(radix))
332
332
+
lex_number(
333
333
+
advance(lexer, source),
334
334
+
lexed <> "#",
335
335
+
Radix(radix),
336
336
+
AfterRadix,
337
337
+
)
318
338
}
319
339
320
320
-
_ -> #(lexer, token.Integer(lexed))
340
340
+
"_" <> source if position == AfterNumber ->
341
341
+
lex_number(advance(lexer, source), lexed <> "_", mode, AfterSeparator)
342
342
+
343
343
+
"_" <> _ -> #(
344
344
+
error(lexer, NumericSeparatorNotAllowed),
345
345
+
token.Integer(lexed),
346
346
+
)
347
347
+
348
348
+
"." <> source if mode == Initial && position == AfterNumber ->
349
349
+
lex_number(advance(lexer, source), lexed <> ".", Decimal, AfterDecimal)
350
350
+
351
351
+
"e-" as prefix <> source
352
352
+
| "e" as prefix <> source
353
353
+
| "E-" as prefix <> source
354
354
+
| "E" as prefix <> source
355
355
+
if mode == Decimal && position == AfterNumber
356
356
+
->
357
357
+
lex_number(
358
358
+
advance(lexer, source),
359
359
+
lexed <> prefix,
360
360
+
Exponent,
361
361
+
AfterExponent,
362
362
+
)
363
363
+
364
364
+
_ -> {
365
365
+
let token = case mode {
366
366
+
Decimal | Exponent -> token.Float(lexed)
367
367
+
Initial | Radix(_) -> token.Integer(lexed)
368
368
+
}
369
369
+
case position {
370
370
+
// If we have some code that looks like `15.`, that is valid syntax,
371
371
+
// but it's an integer followed by a dot, not a float.
372
372
+
AfterDecimal -> #(
373
373
+
advance(lexer, "." <> lexer.source),
374
374
+
token.Integer(string.drop_end(lexed, 1)),
375
375
+
)
376
376
+
AfterExponent -> #(error(lexer, ExpectedExponent), token)
377
377
+
AfterRadix -> #(error(lexer, NumbersCannotEndAfterRadix), token)
378
378
+
AfterNumber -> #(lexer, token)
379
379
+
AfterSeparator -> #(error(lexer, NumericSeparatorNotAllowed), token)
380
380
+
}
381
381
+
}
321
382
}
322
383
}
323
384