An Erlang lexer and syntax highlighter in Gleam

Lex sigils

+155 -2
+115
src/pearl.gleam
··· 20 20 string: Splitter, 21 21 quoted_atom: Splitter, 22 22 brace_escape_sequence: Splitter, 23 + sigil: Splitter, 24 + sigil_verbatim: Splitter, 23 25 ) 24 26 } 25 27 ··· 33 35 NumbersCannotEndAfterRadix 34 36 UnterminatedCharacter 35 37 UnterminatedEscapeSequence 38 + ExpectedSigilDelimiter 36 39 } 37 40 38 41 pub fn new(source: String) -> Lexer { ··· 51 54 string: splitter.new(["\"", "\\"]), 52 55 quoted_atom: splitter.new(["'", "\\"]), 53 56 brace_escape_sequence: splitter.new(["}", "\n", "\r"]), 57 + sigil: splitter.new([ 58 + ")", "]", "}", ">", "/", "|", "'", "\"", "`", "#", "\\", 59 + ]), 60 + sigil_verbatim: splitter.new([ 61 + ")", "]", "}", ">", "/", "|", "'", "\"", "`", "#", 62 + ]), 54 63 ) 55 64 } 56 65 ··· 214 223 "'" <> source -> lex_quoted_atom(advance(lexer, source), "") 215 224 216 225 "$" <> source -> lex_character(advance(lexer, source)) 226 + 227 + "~" <> source -> lex_sigil(advance(lexer, source)) 217 228 218 229 _ -> 219 230 case string.pop_grapheme(lexer.source) { ··· 557 568 AfterSeparator -> #(error(lexer, NumericSeparatorNotAllowed), token) 558 569 } 559 570 } 571 + } 572 + } 573 + 574 + fn lex_sigil(lexer: Lexer) -> #(Lexer, Token) { 575 + let #(lexer, sigil, verbatim) = case lexer.source { 576 + "b" as sigil <> source | "s" as sigil <> source -> #( 577 + advance(lexer, source), 578 + sigil, 579 + False, 580 + ) 581 + 582 + "B" as sigil <> source | "S" as sigil <> source -> #( 583 + advance(lexer, source), 584 + sigil, 585 + True, 586 + ) 587 + _ -> #(lexer, "", False) 588 + } 589 + 590 + let #(lexer, delimiter, closing_char) = case lexer.source { 591 + "(" <> source -> #(advance(lexer, source), token.SigilParen, ")") 592 + "[" <> source -> #(advance(lexer, source), token.SigilSquare, "]") 593 + "{" <> source -> #(advance(lexer, source), token.SigilBrace, "}") 594 + "<" <> source -> #(advance(lexer, source), token.SigilAngle, ">") 595 + 596 + "/" <> source -> #(advance(lexer, source), token.SigilSlash, "/") 597 + "|" <> source -> #(advance(lexer, source), token.SigilPipe, "|") 598 + "'" <> source -> #(advance(lexer, source), token.SigilSingleQuote, "'") 599 + "\"" <> source -> #(advance(lexer, source), token.SigilDoubleQuote, "\"") 600 + "`" <> source -> #(advance(lexer, source), token.SigilBacktick, "`") 601 + "#" <> source -> #(advance(lexer, source), token.SigilHash, "#") 602 + 603 + _ -> #(error(lexer, ExpectedSigilDelimiter), token.SigilNone, "") 604 + } 605 + 606 + case delimiter { 607 + token.SigilNone -> #( 608 + lexer, 609 + token.UnterminatedSigil(sigil:, delimiter:, contents: ""), 610 + ) 611 + _ -> { 612 + let splitter = case verbatim { 613 + False -> lexer.splitters.sigil 614 + True -> lexer.splitters.sigil_verbatim 615 + } 616 + 617 + do_lex_sigil(lexer, sigil, delimiter, closing_char, splitter, "") 618 + } 619 + } 620 + } 621 + 622 + fn do_lex_sigil( 623 + lexer: Lexer, 624 + sigil: String, 625 + delimiter: token.SigilDelimiter, 626 + closing_char: String, 627 + splitter: Splitter, 628 + contents: String, 629 + ) -> #(Lexer, Token) { 630 + let #(before, split, after) = splitter.split(splitter, lexer.source) 631 + case split { 632 + "" -> #( 633 + error(advance(lexer, after), UnterminatedString), 634 + token.UnterminatedSigil(sigil:, delimiter:, contents: contents <> before), 635 + ) 636 + 637 + "\\" -> 638 + case string.pop_grapheme(after) { 639 + Error(_) -> #( 640 + error(advance(lexer, after), UnterminatedString), 641 + token.UnterminatedSigil( 642 + sigil:, 643 + delimiter:, 644 + contents: contents <> before <> "\\", 645 + ), 646 + ) 647 + Ok(#(character, source)) -> 648 + do_lex_sigil( 649 + advance(lexer, source), 650 + sigil, 651 + delimiter, 652 + closing_char, 653 + splitter, 654 + contents <> before <> "\\" <> character, 655 + ) 656 + } 657 + 658 + _ if split == closing_char -> #( 659 + advance(lexer, after), 660 + token.Sigil(sigil:, delimiter:, contents: contents <> before), 661 + ) 662 + 663 + // Here, we've split on a delimiter which doesn't match the current sigil. 664 + // In this case, we must continue lexing until we find a delimiter of the 665 + // correct kind. 666 + _ -> 667 + do_lex_sigil( 668 + advance(lexer, after), 669 + sigil, 670 + delimiter, 671 + closing_char, 672 + splitter, 673 + contents <> before <> split, 674 + ) 560 675 } 561 676 } 562 677
+40 -2
src/pearl/token.gleam
··· 12 12 Atom(name: String, quoted: Bool) 13 13 String(String) 14 14 TripleQuotedString(contents: String, end_indentation: String) 15 - Sigil(sigil: String, contents: String) 15 + Sigil(sigil: String, delimiter: SigilDelimiter, contents: String) 16 16 Variable(String) 17 17 18 18 // Keywords ··· 95 95 // Invalid tokens 96 96 Unknown(String) 97 97 UnterminatedString(String) 98 + UnterminatedSigil(sigil: String, delimiter: SigilDelimiter, contents: String) 98 99 UnterminatedAtom(String) 99 100 } 100 101 ··· 115 116 String(contents) -> "\"" <> contents <> "\"" 116 117 TripleQuotedString(contents:, end_indentation:) -> 117 118 "\"\"\"\n" <> contents <> "\n" <> end_indentation <> "\"\"\"" 118 - Sigil(sigil:, contents:) -> "~" <> sigil <> "\"" <> contents <> "\"" 119 + Sigil(sigil:, delimiter:, contents:) -> { 120 + let #(opening, closing) = sigil_delimiters(delimiter) 121 + "~" <> sigil <> opening <> contents <> closing 122 + } 119 123 Variable(name) -> name 120 124 121 125 // Keywords ··· 198 202 // Invalid tokens 199 203 Unknown(char) -> char 200 204 UnterminatedString(contents) -> "\"" <> contents 205 + UnterminatedSigil(sigil:, contents:, delimiter:) -> { 206 + let #(opening, _closing) = sigil_delimiters(delimiter) 207 + "~" <> sigil <> opening <> contents 208 + } 201 209 UnterminatedAtom(contents) -> "'" <> contents 202 210 } 203 211 } 212 + 213 + pub type SigilDelimiter { 214 + SigilNone 215 + SigilParen 216 + SigilSquare 217 + SigilBrace 218 + SigilAngle 219 + SigilSlash 220 + SigilPipe 221 + SigilSingleQuote 222 + SigilDoubleQuote 223 + SigilBacktick 224 + SigilHash 225 + } 226 + 227 + pub fn sigil_delimiters(delimiter: SigilDelimiter) -> #(String, String) { 228 + case delimiter { 229 + SigilNone -> #("", "") 230 + SigilAngle -> #("<", ">") 231 + SigilBacktick -> #("`", "`") 232 + SigilBrace -> #("{", "}") 233 + SigilDoubleQuote -> #("\"", "\"") 234 + SigilHash -> #("#", "#") 235 + SigilParen -> #("(", ")") 236 + SigilPipe -> #("|", "|") 237 + SigilSingleQuote -> #("'", "'") 238 + SigilSlash -> #("/", "/") 239 + SigilSquare -> #("[", "]") 240 + } 241 + }