An Erlang lexer and syntax highlighter in Gleam

Lex integers

+118
+118
src/pearl.gleam
··· 1 + import gleam/int 1 2 import gleam/list 2 3 import gleam/string 3 4 import pearl/token.{type Token} ··· 25 26 UnknownCharacter(character: String) 26 27 UnterminatedString 27 28 UnterminatedAtom 29 + InvalidRadix(radix: String) 28 30 } 29 31 30 32 pub fn new(source: String) -> Lexer { ··· 189 191 | "Z" as char <> source 190 192 | "_" as char <> source -> lex_variable(advance(lexer, source), char) 191 193 194 + "0" as char <> source 195 + | "1" as char <> source 196 + | "2" as char <> source 197 + | "3" as char <> source 198 + | "4" as char <> source 199 + | "5" as char <> source 200 + | "6" as char <> source 201 + | "7" as char <> source 202 + | "8" as char <> source 203 + | "9" as char <> source -> lex_number(advance(lexer, source), char, Initial) 204 + 192 205 "\"" <> source -> lex_string(advance(lexer, source), "") 193 206 "'" <> source -> lex_quoted_atom(advance(lexer, source), "") 194 207 ··· 200 213 token.Unknown(char), 201 214 ) 202 215 } 216 + } 217 + } 218 + 219 + type LexNumberMode { 220 + Initial 221 + Radix(Int) 222 + } 223 + 224 + fn lex_number( 225 + lexer: Lexer, 226 + lexed: String, 227 + mode: LexNumberMode, 228 + ) -> #(Lexer, Token) { 229 + let radix = case mode { 230 + Initial -> 10 231 + Radix(r) -> r 232 + } 233 + 234 + case lexer.source { 235 + "0" as char <> source | "1" as char <> source -> 236 + lex_number(advance(lexer, source), lexed <> char, mode) 237 + "2" as char <> source if radix >= 3 -> 238 + lex_number(advance(lexer, source), lexed <> char, mode) 239 + "3" as char <> source if radix >= 4 -> 240 + lex_number(advance(lexer, source), lexed <> char, mode) 241 + "4" as char <> source if radix >= 5 -> 242 + lex_number(advance(lexer, source), lexed <> char, mode) 243 + "5" as char <> source if radix >= 6 -> 244 + lex_number(advance(lexer, source), lexed <> char, mode) 245 + "6" as char <> source if radix >= 7 -> 246 + lex_number(advance(lexer, source), lexed <> char, mode) 247 + "7" as char <> source if radix >= 8 -> 248 + lex_number(advance(lexer, source), lexed <> char, mode) 249 + "8" as char <> source if radix >= 9 -> 250 + lex_number(advance(lexer, source), lexed <> char, mode) 251 + "9" as char <> source if radix >= 10 -> 252 + lex_number(advance(lexer, source), lexed <> char, mode) 253 + "a" as char <> source | "A" as char <> source if radix >= 11 -> 254 + lex_number(advance(lexer, source), lexed <> char, mode) 255 + "b" as char <> source | "B" as char <> source if radix >= 12 -> 256 + lex_number(advance(lexer, source), lexed <> char, mode) 257 + "c" as char <> source | "C" as char <> source if radix >= 13 -> 258 + lex_number(advance(lexer, source), lexed <> char, mode) 259 + "d" as char <> source | "D" as char <> source if radix >= 14 -> 260 + lex_number(advance(lexer, source), lexed <> char, mode) 261 + "e" as char <> source | "E" as char <> source if radix >= 15 -> 262 + lex_number(advance(lexer, source), lexed <> char, mode) 263 + "f" as char <> source | "F" as char <> source if radix >= 16 -> 264 + lex_number(advance(lexer, source), lexed <> char, mode) 265 + "g" as char <> source | "G" as char <> source if radix >= 17 -> 266 + lex_number(advance(lexer, source), lexed <> char, mode) 267 + "h" as char <> source | "H" as char <> source if radix >= 18 -> 268 + lex_number(advance(lexer, source), lexed <> char, mode) 269 + "i" as char <> source | "I" as char <> source if radix >= 19 -> 270 + lex_number(advance(lexer, source), lexed <> char, mode) 271 + "j" as char <> source | "J" as char <> source if radix >= 20 -> 272 + lex_number(advance(lexer, source), lexed <> char, mode) 273 + "k" as char <> source | "K" as char <> source if radix >= 21 -> 274 + lex_number(advance(lexer, source), lexed <> char, mode) 275 + "l" as char <> source | "L" as char <> source if radix >= 22 -> 276 + lex_number(advance(lexer, source), lexed <> char, mode) 277 + "m" as char <> source | "M" as char <> source if radix >= 23 -> 278 + lex_number(advance(lexer, source), lexed <> char, mode) 279 + "n" as char <> source | "N" as char <> source if radix >= 24 -> 280 + lex_number(advance(lexer, source), lexed <> char, mode) 281 + "o" as char <> source | "O" as char <> source if radix >= 25 -> 282 + lex_number(advance(lexer, source), lexed <> char, mode) 283 + "p" as char <> source | "P" as char <> source if radix >= 26 -> 284 + lex_number(advance(lexer, source), lexed <> char, mode) 285 + "q" as char <> source | "Q" as char <> source if radix >= 27 -> 286 + lex_number(advance(lexer, source), lexed <> char, mode) 287 + "r" as char <> source | "R" as char <> source if radix >= 28 -> 288 + lex_number(advance(lexer, source), lexed <> char, mode) 289 + "s" as char <> source | "S" as char <> source if radix >= 29 -> 290 + lex_number(advance(lexer, source), lexed <> char, mode) 291 + "t" as char <> source | "T" as char <> source if radix >= 30 -> 292 + lex_number(advance(lexer, source), lexed <> char, mode) 293 + "u" as char <> source | "U" as char <> source if radix >= 31 -> 294 + lex_number(advance(lexer, source), lexed <> char, mode) 295 + "v" as char <> source | "V" as char <> source if radix >= 32 -> 296 + lex_number(advance(lexer, source), lexed <> char, mode) 297 + "w" as char <> source | "W" as char <> source if radix >= 33 -> 298 + lex_number(advance(lexer, source), lexed <> char, mode) 299 + "x" as char <> source | "X" as char <> source if radix >= 34 -> 300 + lex_number(advance(lexer, source), lexed <> char, mode) 301 + "y" as char <> source | "Y" as char <> source if radix >= 35 -> 302 + lex_number(advance(lexer, source), lexed <> char, mode) 303 + "z" as char <> source | "Z" as char <> source if radix >= 36 -> 304 + lex_number(advance(lexer, source), lexed <> char, mode) 305 + 306 + "#" <> source if mode == Initial -> 307 + case int.parse(lexed) { 308 + Error(_) -> #( 309 + error(advance(lexer, source), InvalidRadix(lexed)), 310 + token.Integer(lexed), 311 + ) 312 + Ok(radix) if radix < 2 || radix > 36 -> #( 313 + error(advance(lexer, source), InvalidRadix(lexed)), 314 + token.Integer(lexed), 315 + ) 316 + Ok(radix) -> 317 + lex_number(advance(lexer, source), lexed <> "#", Radix(radix)) 318 + } 319 + 320 + _ -> #(lexer, token.Integer(lexed)) 203 321 } 204 322 } 205 323