An Erlang lexer and syntax highlighter in Gleam

Lex floats

+102 -41
+102 -41
src/pearl.gleam
··· 27 27 UnterminatedString 28 28 UnterminatedAtom 29 29 InvalidRadix(radix: String) 30 + NumericSeparatorNotAllowed 31 + ExpectedExponent 32 + NumbersCannotEndAfterRadix 30 33 } 31 34 32 35 pub fn new(source: String) -> Lexer { ··· 200 203 | "6" as char <> source 201 204 | "7" as char <> source 202 205 | "8" as char <> source 203 - | "9" as char <> source -> lex_number(advance(lexer, source), char, Initial) 206 + | "9" as char <> source -> 207 + lex_number(advance(lexer, source), char, Initial, AfterNumber) 204 208 205 209 "\"" <> source -> lex_string(advance(lexer, source), "") 206 210 "'" <> source -> lex_quoted_atom(advance(lexer, source), "") ··· 219 223 type LexNumberMode { 220 224 Initial 221 225 Radix(Int) 226 + Decimal 227 + Exponent 228 + } 229 + 230 + type DelimitedPosition { 231 + AfterDecimal 232 + AfterNumber 233 + AfterSeparator 234 + AfterExponent 235 + AfterRadix 222 236 } 223 237 224 238 fn lex_number( 225 239 lexer: Lexer, 226 240 lexed: String, 227 241 mode: LexNumberMode, 242 + position: DelimitedPosition, 228 243 ) -> #(Lexer, Token) { 229 244 let radix = case mode { 230 - Initial -> 10 231 245 Radix(r) -> r 246 + Initial | Decimal | Exponent -> 10 232 247 } 233 248 234 249 case lexer.source { 235 250 "0" as char <> source | "1" as char <> source -> 236 - lex_number(advance(lexer, source), lexed <> char, mode) 251 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 237 252 "2" as char <> source if radix >= 3 -> 238 - lex_number(advance(lexer, source), lexed <> char, mode) 253 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 239 254 "3" as char <> source if radix >= 4 -> 240 - lex_number(advance(lexer, source), lexed <> char, mode) 255 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 241 256 "4" as char <> source if radix >= 5 -> 242 - lex_number(advance(lexer, source), lexed <> char, mode) 257 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 243 258 "5" as char <> source if radix >= 6 -> 244 - lex_number(advance(lexer, source), lexed <> char, mode) 259 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 245 260 "6" as char <> source if radix >= 7 -> 246 - lex_number(advance(lexer, source), lexed <> char, mode) 261 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 247 262 "7" as char <> source if radix >= 8 -> 248 - lex_number(advance(lexer, source), lexed <> char, mode) 263 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 249 264 "8" as char <> source if radix >= 9 -> 250 - lex_number(advance(lexer, source), lexed <> char, mode) 265 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 251 266 "9" as char <> source if radix >= 10 -> 252 - lex_number(advance(lexer, source), lexed <> char, mode) 267 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 253 268 "a" as char <> source | "A" as char <> source if radix >= 11 -> 254 - lex_number(advance(lexer, source), lexed <> char, mode) 269 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 255 270 "b" as char <> source | "B" as char <> source if radix >= 12 -> 256 - lex_number(advance(lexer, source), lexed <> char, mode) 271 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 257 272 "c" as char <> source | "C" as char <> source if radix >= 13 -> 258 - lex_number(advance(lexer, source), lexed <> char, mode) 273 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 259 274 "d" as char <> source | "D" as char <> source if radix >= 14 -> 260 - lex_number(advance(lexer, source), lexed <> char, mode) 275 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 261 276 "e" as char <> source | "E" as char <> source if radix >= 15 -> 262 - lex_number(advance(lexer, source), lexed <> char, mode) 277 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 263 278 "f" as char <> source | "F" as char <> source if radix >= 16 -> 264 - lex_number(advance(lexer, source), lexed <> char, mode) 279 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 265 280 "g" as char <> source | "G" as char <> source if radix >= 17 -> 266 - lex_number(advance(lexer, source), lexed <> char, mode) 281 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 267 282 "h" as char <> source | "H" as char <> source if radix >= 18 -> 268 - lex_number(advance(lexer, source), lexed <> char, mode) 283 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 269 284 "i" as char <> source | "I" as char <> source if radix >= 19 -> 270 - lex_number(advance(lexer, source), lexed <> char, mode) 285 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 271 286 "j" as char <> source | "J" as char <> source if radix >= 20 -> 272 - lex_number(advance(lexer, source), lexed <> char, mode) 287 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 273 288 "k" as char <> source | "K" as char <> source if radix >= 21 -> 274 - lex_number(advance(lexer, source), lexed <> char, mode) 289 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 275 290 "l" as char <> source | "L" as char <> source if radix >= 22 -> 276 - lex_number(advance(lexer, source), lexed <> char, mode) 291 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 277 292 "m" as char <> source | "M" as char <> source if radix >= 23 -> 278 - lex_number(advance(lexer, source), lexed <> char, mode) 293 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 279 294 "n" as char <> source | "N" as char <> source if radix >= 24 -> 280 - lex_number(advance(lexer, source), lexed <> char, mode) 295 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 281 296 "o" as char <> source | "O" as char <> source if radix >= 25 -> 282 - lex_number(advance(lexer, source), lexed <> char, mode) 297 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 283 298 "p" as char <> source | "P" as char <> source if radix >= 26 -> 284 - lex_number(advance(lexer, source), lexed <> char, mode) 299 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 285 300 "q" as char <> source | "Q" as char <> source if radix >= 27 -> 286 - lex_number(advance(lexer, source), lexed <> char, mode) 301 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 287 302 "r" as char <> source | "R" as char <> source if radix >= 28 -> 288 - lex_number(advance(lexer, source), lexed <> char, mode) 303 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 289 304 "s" as char <> source | "S" as char <> source if radix >= 29 -> 290 - lex_number(advance(lexer, source), lexed <> char, mode) 305 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 291 306 "t" as char <> source | "T" as char <> source if radix >= 30 -> 292 - lex_number(advance(lexer, source), lexed <> char, mode) 307 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 293 308 "u" as char <> source | "U" as char <> source if radix >= 31 -> 294 - lex_number(advance(lexer, source), lexed <> char, mode) 309 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 295 310 "v" as char <> source | "V" as char <> source if radix >= 32 -> 296 - lex_number(advance(lexer, source), lexed <> char, mode) 311 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 297 312 "w" as char <> source | "W" as char <> source if radix >= 33 -> 298 - lex_number(advance(lexer, source), lexed <> char, mode) 313 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 299 314 "x" as char <> source | "X" as char <> source if radix >= 34 -> 300 - lex_number(advance(lexer, source), lexed <> char, mode) 315 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 301 316 "y" as char <> source | "Y" as char <> source if radix >= 35 -> 302 - lex_number(advance(lexer, source), lexed <> char, mode) 317 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 303 318 "z" as char <> source | "Z" as char <> source if radix >= 36 -> 304 - lex_number(advance(lexer, source), lexed <> char, mode) 319 + lex_number(advance(lexer, source), lexed <> char, mode, AfterNumber) 305 320 306 - "#" <> source if mode == Initial -> 307 - case int.parse(lexed) { 321 + "#" <> source if mode == Initial && position == AfterNumber -> 322 + case int.parse(string.replace(in: lexed, each: "_", with: "")) { 308 323 Error(_) -> #( 309 324 error(advance(lexer, source), InvalidRadix(lexed)), 310 325 token.Integer(lexed), ··· 314 329 token.Integer(lexed), 315 330 ) 316 331 Ok(radix) -> 317 - lex_number(advance(lexer, source), lexed <> "#", Radix(radix)) 332 + lex_number( 333 + advance(lexer, source), 334 + lexed <> "#", 335 + Radix(radix), 336 + AfterRadix, 337 + ) 318 338 } 319 339 320 - _ -> #(lexer, token.Integer(lexed)) 340 + "_" <> source if position == AfterNumber -> 341 + lex_number(advance(lexer, source), lexed <> "_", mode, AfterSeparator) 342 + 343 + "_" <> _ -> #( 344 + error(lexer, NumericSeparatorNotAllowed), 345 + token.Integer(lexed), 346 + ) 347 + 348 + "." <> source if mode == Initial && position == AfterNumber -> 349 + lex_number(advance(lexer, source), lexed <> ".", Decimal, AfterDecimal) 350 + 351 + "e-" as prefix <> source 352 + | "e" as prefix <> source 353 + | "E-" as prefix <> source 354 + | "E" as prefix <> source 355 + if mode == Decimal && position == AfterNumber 356 + -> 357 + lex_number( 358 + advance(lexer, source), 359 + lexed <> prefix, 360 + Exponent, 361 + AfterExponent, 362 + ) 363 + 364 + _ -> { 365 + let token = case mode { 366 + Decimal | Exponent -> token.Float(lexed) 367 + Initial | Radix(_) -> token.Integer(lexed) 368 + } 369 + case position { 370 + // If we have some code that looks like `15.`, that is valid syntax, 371 + // but it's an integer followed by a dot, not a float. 372 + AfterDecimal -> #( 373 + advance(lexer, "." <> lexer.source), 374 + token.Integer(string.drop_end(lexed, 1)), 375 + ) 376 + AfterExponent -> #(error(lexer, ExpectedExponent), token) 377 + AfterRadix -> #(error(lexer, NumbersCannotEndAfterRadix), token) 378 + AfterNumber -> #(lexer, token) 379 + AfterSeparator -> #(error(lexer, NumericSeparatorNotAllowed), token) 380 + } 381 + } 321 382 } 322 383 } 323 384