Lark EBNF grammar for a Data Flow assembly graph language
or1-dfasm
1// Dataflow Graph Assembly — Lark EBNF Grammar v0.2
2// Parser: Earley (required for ambiguity-free resolution of location_dir vs weak_edge)
3
4start: (_NL* statement)* _NL*
5
6?statement: func_def
7 | inst_def
8 | strong_edge
9 | weak_edge
10 | plain_edge
11 | data_def
12 | location_dir
13
14// --- Function / subgraph definition ---
15// $name |> { body }
16func_def: func_ref FLOW_OUT "{" (_NL* statement)* _NL* "}"
17
18// --- Instruction definition (named node) ---
19// &label <| opcode [, arg ...]
20inst_def: qualified_ref FLOW_IN opcode ("," argument)*
21
22// --- Strong inline edge (internal route, anonymous node) ---
23// opcode input [, input ...] |> output [, output ...]
24strong_edge: opcode argument ("," argument)* FLOW_OUT ref_list
25
26// --- Weak inline edge (token output, anonymous node) ---
27// output [, output ...] opcode <| input [, input ...]
28weak_edge: ref_list opcode FLOW_IN argument ("," argument)*
29
30// --- Plain edge (wiring between named nodes) ---
31// source |> dest [, dest ...]
32plain_edge: qualified_ref FLOW_OUT ref_list
33
34// --- Data / initialisation ---
35// ref = value | ref = #macro args
36data_def: qualified_ref "=" (macro_call | value_list)
37
38// --- Location directive (bare qualified ref, no operator) ---
39// Sets location context for subsequent definitions.
40location_dir: qualified_ref
41
42// === Shared productions ===
43
44ref_list: qualified_ref ("," qualified_ref)*
45
46// === References ===
47// Qualifier chain: max one placement (|ident) and one port (:spec).
48// @name — node reference
49// &name — local label reference
50// $name — function / subgraph reference
51// Chaining: @sum|pe0:L (placement + port)
52
53qualified_ref: (node_ref | label_ref | func_ref) placement? port?
54
55node_ref: "@" IDENT
56label_ref: "&" IDENT
57func_ref: "$" IDENT
58
59placement: "|" IDENT
60port: ":" PORT_SPEC
61
62PORT_SPEC: IDENT | HEX_LIT | DEC_LIT
63
64// === Arguments ===
65// An argument is a value, a qualified ref, or a named key=value pair.
66// Named args are syntactically valid on any instruction.
67// Semantic validation (which ops accept named args) is deferred to the assembler.
68
69?argument: named_arg | positional_arg
70named_arg: IDENT "=" positional_arg
71?positional_arg: value | qualified_ref
72
73// === Values (literals) ===
74
75?value: HEX_LIT -> hex_literal
76 | DEC_LIT -> dec_literal
77 | CHAR_LIT -> char_literal
78 | STRING_LIT -> string_literal
79 | RAW_STRING_LIT -> raw_string_literal
80 | BYTE_STRING_LIT -> byte_string_literal
81
82value_list: value ("," value)*
83
84// === Macros ===
85// #name arg [arg ...] — expanded in a later pass, not during parsing.
86
87macro_call: "#" IDENT (value | qualified_ref)*
88
89// === Opcodes ===
90// Exhaustive keyword terminal. Priority 2 ensures opcodes win over IDENT
91// at the lexer level. Semantic validation (monadic/dyadic arity, valid
92// argument combinations) is deferred to the assembler.
93
94opcode: OPCODE
95
96OPCODE.2: "add" | "sub" | "inc" | "dec"
97 | "shiftl" | "shiftr" | "ashiftr"
98 | "and" | "or" | "xor" | "not"
99 | "eq" | "lt" | "lte" | "gt" | "gte"
100 | "breq" | "brgt" | "brge" | "brof" | "brty"
101 | "sweq" | "swgt" | "swge" | "swof" | "swty"
102 | "gate" | "sel" | "merge"
103 | "pass" | "const" | "free"
104 | "ior" | "iow" | "iorw"
105 | "load_inst" | "route_set"
106
107// === Flow operators ===
108// Priority 3 to win over any partial match of | or < or >
109
110FLOW_IN.3: "<|"
111FLOW_OUT.3: "|>"
112
113// === Terminals ===
114
115HEX_LIT: /0x[0-9a-fA-F]+/
116DEC_LIT: /[0-9]+/
117
118// Character literals: single char or escape sequence.
119// Supported escapes: \n \t \r \0 \\ \' \xNN
120CHAR_LIT: /'([^'\\]|\\[ntr0\\']|\\x[0-9a-fA-F]{2})'/
121
122// String literals — Rust-style semantics.
123// "..." regular string, escape sequences processed by assembler
124// r"..." raw string, no escape processing
125// b"..." byte string, semantic difference only (raw byte values)
126// Multi-line strings are permitted: /s flag makes . match \n.
127STRING_LIT: /\"([^\"\\]|\\.)*\"/s
128RAW_STRING_LIT: /r\"[^\"]*\"/s
129BYTE_STRING_LIT: /b\"([^\"\\]|\\.)*\"/s
130
131IDENT: /[a-zA-Z_][a-zA-Z0-9_]*/
132
133// === Whitespace & Comments ===
134// ; starts a comment to end of line (traditional asm behaviour).
135// Newlines are significant as statement separators.
136
137COMMENT: /;[^\n]*/
138_NL: (NEWLINE | COMMENT) (NEWLINE | COMMENT)*
139
140%import common.NEWLINE
141%import common.WS_INLINE
142%ignore WS_INLINE
143%ignore COMMENT