tangled
alpha
login
or
join now
gearsco.de
/
pearl
2
fork
atom
An Erlang lexer and syntax highlighter in Gleam
2
fork
atom
overview
issues
pulls
pipelines
Lex sigils
gearsco.de
11 months ago
2fa9f075
50df07b2
+155
-2
2 changed files
expand all
collapse all
unified
split
src
pearl
token.gleam
pearl.gleam
+115
src/pearl.gleam
···
20
20
string: Splitter,
21
21
quoted_atom: Splitter,
22
22
brace_escape_sequence: Splitter,
23
23
+
sigil: Splitter,
24
24
+
sigil_verbatim: Splitter,
23
25
)
24
26
}
25
27
···
33
35
NumbersCannotEndAfterRadix
34
36
UnterminatedCharacter
35
37
UnterminatedEscapeSequence
38
38
+
ExpectedSigilDelimiter
36
39
}
37
40
38
41
pub fn new(source: String) -> Lexer {
···
51
54
string: splitter.new(["\"", "\\"]),
52
55
quoted_atom: splitter.new(["'", "\\"]),
53
56
brace_escape_sequence: splitter.new(["}", "\n", "\r"]),
57
57
+
sigil: splitter.new([
58
58
+
")", "]", "}", ">", "/", "|", "'", "\"", "`", "#", "\\",
59
59
+
]),
60
60
+
sigil_verbatim: splitter.new([
61
61
+
")", "]", "}", ">", "/", "|", "'", "\"", "`", "#",
62
62
+
]),
54
63
)
55
64
}
56
65
···
214
223
"'" <> source -> lex_quoted_atom(advance(lexer, source), "")
215
224
216
225
"$" <> source -> lex_character(advance(lexer, source))
226
226
+
227
227
+
"~" <> source -> lex_sigil(advance(lexer, source))
217
228
218
229
_ ->
219
230
case string.pop_grapheme(lexer.source) {
···
557
568
AfterSeparator -> #(error(lexer, NumericSeparatorNotAllowed), token)
558
569
}
559
570
}
571
571
+
}
572
572
+
}
573
573
+
574
574
+
fn lex_sigil(lexer: Lexer) -> #(Lexer, Token) {
575
575
+
let #(lexer, sigil, verbatim) = case lexer.source {
576
576
+
"b" as sigil <> source | "s" as sigil <> source -> #(
577
577
+
advance(lexer, source),
578
578
+
sigil,
579
579
+
False,
580
580
+
)
581
581
+
582
582
+
"B" as sigil <> source | "S" as sigil <> source -> #(
583
583
+
advance(lexer, source),
584
584
+
sigil,
585
585
+
True,
586
586
+
)
587
587
+
_ -> #(lexer, "", False)
588
588
+
}
589
589
+
590
590
+
let #(lexer, delimiter, closing_char) = case lexer.source {
591
591
+
"(" <> source -> #(advance(lexer, source), token.SigilParen, ")")
592
592
+
"[" <> source -> #(advance(lexer, source), token.SigilSquare, "]")
593
593
+
"{" <> source -> #(advance(lexer, source), token.SigilBrace, "}")
594
594
+
"<" <> source -> #(advance(lexer, source), token.SigilAngle, ">")
595
595
+
596
596
+
"/" <> source -> #(advance(lexer, source), token.SigilSlash, "/")
597
597
+
"|" <> source -> #(advance(lexer, source), token.SigilPipe, "|")
598
598
+
"'" <> source -> #(advance(lexer, source), token.SigilSingleQuote, "'")
599
599
+
"\"" <> source -> #(advance(lexer, source), token.SigilDoubleQuote, "\"")
600
600
+
"`" <> source -> #(advance(lexer, source), token.SigilBacktick, "`")
601
601
+
"#" <> source -> #(advance(lexer, source), token.SigilHash, "#")
602
602
+
603
603
+
_ -> #(error(lexer, ExpectedSigilDelimiter), token.SigilNone, "")
604
604
+
}
605
605
+
606
606
+
case delimiter {
607
607
+
token.SigilNone -> #(
608
608
+
lexer,
609
609
+
token.UnterminatedSigil(sigil:, delimiter:, contents: ""),
610
610
+
)
611
611
+
_ -> {
612
612
+
let splitter = case verbatim {
613
613
+
False -> lexer.splitters.sigil
614
614
+
True -> lexer.splitters.sigil_verbatim
615
615
+
}
616
616
+
617
617
+
do_lex_sigil(lexer, sigil, delimiter, closing_char, splitter, "")
618
618
+
}
619
619
+
}
620
620
+
}
621
621
+
622
622
+
fn do_lex_sigil(
623
623
+
lexer: Lexer,
624
624
+
sigil: String,
625
625
+
delimiter: token.SigilDelimiter,
626
626
+
closing_char: String,
627
627
+
splitter: Splitter,
628
628
+
contents: String,
629
629
+
) -> #(Lexer, Token) {
630
630
+
let #(before, split, after) = splitter.split(splitter, lexer.source)
631
631
+
case split {
632
632
+
"" -> #(
633
633
+
error(advance(lexer, after), UnterminatedString),
634
634
+
token.UnterminatedSigil(sigil:, delimiter:, contents: contents <> before),
635
635
+
)
636
636
+
637
637
+
"\\" ->
638
638
+
case string.pop_grapheme(after) {
639
639
+
Error(_) -> #(
640
640
+
error(advance(lexer, after), UnterminatedString),
641
641
+
token.UnterminatedSigil(
642
642
+
sigil:,
643
643
+
delimiter:,
644
644
+
contents: contents <> before <> "\\",
645
645
+
),
646
646
+
)
647
647
+
Ok(#(character, source)) ->
648
648
+
do_lex_sigil(
649
649
+
advance(lexer, source),
650
650
+
sigil,
651
651
+
delimiter,
652
652
+
closing_char,
653
653
+
splitter,
654
654
+
contents <> before <> "\\" <> character,
655
655
+
)
656
656
+
}
657
657
+
658
658
+
_ if split == closing_char -> #(
659
659
+
advance(lexer, after),
660
660
+
token.Sigil(sigil:, delimiter:, contents: contents <> before),
661
661
+
)
662
662
+
663
663
+
// Here, we've split on a delimiter which doesn't match the current sigil.
664
664
+
// In this case, we must continue lexing until we find a delimiter of the
665
665
+
// correct kind.
666
666
+
_ ->
667
667
+
do_lex_sigil(
668
668
+
advance(lexer, after),
669
669
+
sigil,
670
670
+
delimiter,
671
671
+
closing_char,
672
672
+
splitter,
673
673
+
contents <> before <> split,
674
674
+
)
560
675
}
561
676
}
562
677
+40
-2
src/pearl/token.gleam
···
12
12
Atom(name: String, quoted: Bool)
13
13
String(String)
14
14
TripleQuotedString(contents: String, end_indentation: String)
15
15
-
Sigil(sigil: String, contents: String)
15
15
+
Sigil(sigil: String, delimiter: SigilDelimiter, contents: String)
16
16
Variable(String)
17
17
18
18
// Keywords
···
95
95
// Invalid tokens
96
96
Unknown(String)
97
97
UnterminatedString(String)
98
98
+
UnterminatedSigil(sigil: String, delimiter: SigilDelimiter, contents: String)
98
99
UnterminatedAtom(String)
99
100
}
100
101
···
115
116
String(contents) -> "\"" <> contents <> "\""
116
117
TripleQuotedString(contents:, end_indentation:) ->
117
118
"\"\"\"\n" <> contents <> "\n" <> end_indentation <> "\"\"\""
118
118
-
Sigil(sigil:, contents:) -> "~" <> sigil <> "\"" <> contents <> "\""
119
119
+
Sigil(sigil:, delimiter:, contents:) -> {
120
120
+
let #(opening, closing) = sigil_delimiters(delimiter)
121
121
+
"~" <> sigil <> opening <> contents <> closing
122
122
+
}
119
123
Variable(name) -> name
120
124
121
125
// Keywords
···
198
202
// Invalid tokens
199
203
Unknown(char) -> char
200
204
UnterminatedString(contents) -> "\"" <> contents
205
205
+
UnterminatedSigil(sigil:, contents:, delimiter:) -> {
206
206
+
let #(opening, _closing) = sigil_delimiters(delimiter)
207
207
+
"~" <> sigil <> opening <> contents
208
208
+
}
201
209
UnterminatedAtom(contents) -> "'" <> contents
202
210
}
203
211
}
212
212
+
213
213
+
pub type SigilDelimiter {
214
214
+
SigilNone
215
215
+
SigilParen
216
216
+
SigilSquare
217
217
+
SigilBrace
218
218
+
SigilAngle
219
219
+
SigilSlash
220
220
+
SigilPipe
221
221
+
SigilSingleQuote
222
222
+
SigilDoubleQuote
223
223
+
SigilBacktick
224
224
+
SigilHash
225
225
+
}
226
226
+
227
227
+
pub fn sigil_delimiters(delimiter: SigilDelimiter) -> #(String, String) {
228
228
+
case delimiter {
229
229
+
SigilNone -> #("", "")
230
230
+
SigilAngle -> #("<", ">")
231
231
+
SigilBacktick -> #("`", "`")
232
232
+
SigilBrace -> #("{", "}")
233
233
+
SigilDoubleQuote -> #("\"", "\"")
234
234
+
SigilHash -> #("#", "#")
235
235
+
SigilParen -> #("(", ")")
236
236
+
SigilPipe -> #("|", "|")
237
237
+
SigilSingleQuote -> #("'", "'")
238
238
+
SigilSlash -> #("/", "/")
239
239
+
SigilSquare -> #("[", "]")
240
240
+
}
241
241
+
}