Lark EBNF grammar for a Data Flow assembly graph language
or1-dfasm
143 lines 4.4 kB view raw
1// Dataflow Graph Assembly Lark EBNF Grammar v0.2 2// Parser: Earley (required for ambiguity-free resolution of location_dir vs weak_edge) 3 4start: (_NL* statement)* _NL* 5 6?statement: func_def 7 | inst_def 8 | strong_edge 9 | weak_edge 10 | plain_edge 11 | data_def 12 | location_dir 13 14// --- Function / subgraph definition --- 15// $name |> { body } 16func_def: func_ref FLOW_OUT "{" (_NL* statement)* _NL* "}" 17 18// --- Instruction definition (named node) --- 19// &label <| opcode [, arg ...] 20inst_def: qualified_ref FLOW_IN opcode ("," argument)* 21 22// --- Strong inline edge (internal route, anonymous node) --- 23// opcode input [, input ...] |> output [, output ...] 24strong_edge: opcode argument ("," argument)* FLOW_OUT ref_list 25 26// --- Weak inline edge (token output, anonymous node) --- 27// output [, output ...] opcode <| input [, input ...] 28weak_edge: ref_list opcode FLOW_IN argument ("," argument)* 29 30// --- Plain edge (wiring between named nodes) --- 31// source |> dest [, dest ...] 32plain_edge: qualified_ref FLOW_OUT ref_list 33 34// --- Data / initialisation --- 35// ref = value | ref = #macro args 36data_def: qualified_ref "=" (macro_call | value_list) 37 38// --- Location directive (bare qualified ref, no operator) --- 39// Sets location context for subsequent definitions. 40location_dir: qualified_ref 41 42// === Shared productions === 43 44ref_list: qualified_ref ("," qualified_ref)* 45 46// === References === 47// Qualifier chain: max one placement (|ident) and one port (:spec). 48// @name node reference 49// &name local label reference 50// $name function / subgraph reference 51// Chaining: @sum|pe0:L (placement + port) 52 53qualified_ref: (node_ref | label_ref | func_ref) placement? port? 54 55node_ref: "@" IDENT 56label_ref: "&" IDENT 57func_ref: "$" IDENT 58 59placement: "|" IDENT 60port: ":" PORT_SPEC 61 62PORT_SPEC: IDENT | HEX_LIT | DEC_LIT 63 64// === Arguments === 65// An argument is a value, a qualified ref, or a named key=value pair. 66// Named args are syntactically valid on any instruction. 67// Semantic validation (which ops accept named args) is deferred to the assembler. 68 69?argument: named_arg | positional_arg 70named_arg: IDENT "=" positional_arg 71?positional_arg: value | qualified_ref 72 73// === Values (literals) === 74 75?value: HEX_LIT -> hex_literal 76 | DEC_LIT -> dec_literal 77 | CHAR_LIT -> char_literal 78 | STRING_LIT -> string_literal 79 | RAW_STRING_LIT -> raw_string_literal 80 | BYTE_STRING_LIT -> byte_string_literal 81 82value_list: value ("," value)* 83 84// === Macros === 85// #name arg [arg ...] — expanded in a later pass, not during parsing. 86 87macro_call: "#" IDENT (value | qualified_ref)* 88 89// === Opcodes === 90// Exhaustive keyword terminal. Priority 2 ensures opcodes win over IDENT 91// at the lexer level. Semantic validation (monadic/dyadic arity, valid 92// argument combinations) is deferred to the assembler. 93 94opcode: OPCODE 95 96OPCODE.2: "add" | "sub" | "inc" | "dec" 97 | "shiftl" | "shiftr" | "ashiftr" 98 | "and" | "or" | "xor" | "not" 99 | "eq" | "lt" | "lte" | "gt" | "gte" 100 | "breq" | "brgt" | "brge" | "brof" | "brty" 101 | "sweq" | "swgt" | "swge" | "swof" | "swty" 102 | "gate" | "sel" | "merge" 103 | "pass" | "const" | "free" 104 | "ior" | "iow" | "iorw" 105 | "load_inst" | "route_set" 106 107// === Flow operators === 108// Priority 3 to win over any partial match of | or < or > 109 110FLOW_IN.3: "<|" 111FLOW_OUT.3: "|>" 112 113// === Terminals === 114 115HEX_LIT: /0x[0-9a-fA-F]+/ 116DEC_LIT: /[0-9]+/ 117 118// Character literals: single char or escape sequence. 119// Supported escapes: \n \t \r \0 \\ \' \xNN 120CHAR_LIT: /'([^'\\]|\\[ntr0\\']|\\x[0-9a-fA-F]{2})'/ 121 122// String literals Rust-style semantics. 123// "..." regular string, escape sequences processed by assembler 124// r"..." raw string, no escape processing 125// b"..." byte string, semantic difference only (raw byte values) 126// Multi-line strings are permitted: /s flag makes . match \n. 127STRING_LIT: /\"([^\"\\]|\\.)*\"/s 128RAW_STRING_LIT: /r\"[^\"]*\"/s 129BYTE_STRING_LIT: /b\"([^\"\\]|\\.)*\"/s 130 131IDENT: /[a-zA-Z_][a-zA-Z0-9_]*/ 132 133// === Whitespace & Comments === 134// ; starts a comment to end of line (traditional asm behaviour). 135// Newlines are significant as statement separators. 136 137COMMENT: /;[^\n]*/ 138_NL: (NEWLINE | COMMENT) (NEWLINE | COMMENT)* 139 140%import common.NEWLINE 141%import common.WS_INLINE 142%ignore WS_INLINE 143%ignore COMMENT