Implementation of the UM-32 "Universal Machine" as described by the Cult of the Bound Variable

fix: clear all clippy pedantic warnings

Signed-off-by: tjh <x@tjh.dev>

tjh.dev b1339d4b 014e9ff7

verified
+263 -156
+1 -1
Cargo.lock
··· 96 96 97 97 [[package]] 98 98 name = "um" 99 - version = "0.2.0" 99 + version = "0.2.1" 100 100 dependencies = [ 101 101 "logos", 102 102 "smallvec",
+1 -2
Cargo.toml
··· 1 1 [package] 2 2 name = "um" 3 - version = "0.2.0" 3 + version = "0.2.1" 4 4 edition = "2021" 5 5 authors = ["tjh <14987462+thomhayward@users.noreply.github.com>"] 6 6 license = "GPL-3.0-only" ··· 18 18 [profile.release] 19 19 lto = "fat" 20 20 codegen-units = 1 21 - 22 21 23 22 [[bin]] 24 23 name = "uasm"
+36 -24
src/asm.rs
··· 12 12 // You should have received a copy of the GNU General Public License along with 13 13 // this program. If not, see <https://www.gnu.org/licenses/>. 14 14 // 15 + 15 16 mod lexer; 16 17 mod parse; 17 18 18 - use crate::Register; 19 + use crate::reg::Register; 19 20 use lexer::Token; 20 21 use parse::{Instruction, Node, NodeType, PragmaType}; 21 22 use std::collections::HashMap; ··· 26 27 Data, 27 28 } 28 29 30 + /// Assemble a Universal Machine program. 31 + /// 32 + /// # Panics 33 + /// 34 + /// Panics if `source` cannot be parsed. 35 + /// 36 + #[must_use] 37 + #[allow(clippy::too_many_lines)] 29 38 pub fn assemble<'s>(source: &'s str) -> Vec<u32> { 30 39 let parsed = parse::parse("", source).unwrap(); 31 40 32 41 let mut sections: HashMap<Section, Vec<&Node<'s>>> = HashMap::new(); 33 42 let mut offsets: HashMap<Section, usize> = HashMap::new(); 34 43 let mut label_locations: HashMap<&'s str, (Section, usize)> = HashMap::new(); 35 - for node in parsed.nodes().iter() { 44 + for node in parsed.nodes() { 36 45 match node.entity { 37 46 NodeType::Pragma(_) => { 38 47 let loc = *offsets ··· 43 52 sections 44 53 .entry(Section::Data) 45 54 .and_modify(|section| section.push(node)) 46 - .or_insert(vec![node]); 55 + .or_insert_with(|| vec![node]); 47 56 48 57 for label in &node.labels { 49 58 label_locations.insert(label, (Section::Data, loc)); ··· 58 67 sections 59 68 .entry(Section::Text) 60 69 .and_modify(|section| section.push(node)) 61 - .or_insert(vec![node]); 70 + .or_insert_with(|| vec![node]); 62 71 63 72 for label in &node.labels { 64 73 label_locations.insert(label, (Section::Text, loc)); 65 74 } 66 75 } 67 - _ => {} 76 + NodeType::Comment(_) => {} 68 77 } 69 78 } 70 79 ··· 72 81 let data_offset = text.len(); 73 82 74 83 let mut program = vec![]; 75 - for node in text.into_iter() { 84 + for node in text { 76 85 let NodeType::Instruction(instruction) = &node.entity else { 77 86 panic!("invalid node in .text section"); 78 87 }; 79 88 80 - let encoded = match instruction { 89 + let encoded = match *instruction { 81 90 Instruction::ConditionalMove { 82 91 destination, 83 92 source, ··· 124 133 } 125 134 Instruction::Halt => encode_standard( 126 135 0x07, 127 - &Default::default(), 128 - &Default::default(), 129 - &Default::default(), 136 + Register::default(), 137 + Register::default(), 138 + Register::default(), 130 139 ), 131 140 Instruction::Alloc { 132 141 destination, 133 142 length, 134 - } => encode_standard(0x08, &Register::default(), destination, length), 143 + } => encode_standard(0x08, Register::default(), destination, length), 135 144 Instruction::Free { block } => { 136 - encode_standard(0x09, &Register::default(), &Register::default(), block) 145 + encode_standard(0x09, Register::default(), Register::default(), block) 137 146 } 138 147 Instruction::Out { source } => { 139 - encode_standard(0x0a, &Default::default(), &Default::default(), source) 148 + encode_standard(0x0a, Register::default(), Register::default(), source) 140 149 } 141 150 Instruction::In { destination } => { 142 - encode_standard(0x0b, &Default::default(), &Default::default(), destination) 151 + encode_standard(0x0b, Register::default(), Register::default(), destination) 143 152 } 144 153 Instruction::Jmp { location } => { 145 154 let parse::Location { block, offset } = location; 146 - encode_standard(0x0c, &Register::default(), block, offset) 155 + encode_standard(0x0c, Register::default(), block, offset) 147 156 } 148 157 Instruction::Address { 149 158 destination, 150 - reference, 159 + ref reference, 151 160 } => { 152 161 // lookup reference 153 162 let Some((section, offset)) = label_locations.get(reference.label) else { ··· 159 168 Section::Data => data_offset + *offset, 160 169 }; 161 170 162 - 0xd0000000 | destination.encode_a_ortho() | encode_literal(value as u32) 171 + 0xd000_0000 172 + | destination.encode_a_ortho() 173 + | encode_literal(u32::try_from(value).unwrap()) 163 174 } 164 175 Instruction::LiteralMove { 165 176 destination, 166 177 literal, 167 - } => 0xd0000000 | destination.encode_a_ortho() | encode_literal(*literal), 178 + } => 0xd000_0000 | destination.encode_a_ortho() | encode_literal(literal), 168 179 }; 169 180 170 181 program.push(encoded); 171 182 } 172 183 173 184 if let Some(data) = sections.remove(&Section::Data) { 174 - for node in data.into_iter() { 185 + for node in data { 175 186 let NodeType::Pragma(pragma) = &node.entity else { 176 187 panic!("invalid node in .data section. {node:?}"); 177 188 }; 178 189 179 190 let encoded = match &pragma.payload { 180 191 PragmaType::WideString { value } => { 181 - for byte in value.as_bytes() { 182 - program.push(*byte as u32); 192 + for &byte in value.as_bytes() { 193 + program.push(u32::from(byte)); 183 194 } 184 195 Some(0) // terminating byte. 185 196 } ··· 196 207 } 197 208 198 209 fn encode_literal(value: u32) -> u32 { 199 - const LITERAL_MAX: u32 = 0x1ffffff; 210 + const LITERAL_MAX: u32 = 0x1ff_ffff; 200 211 assert!(value <= LITERAL_MAX, "literal value exceeds available bits. value: {value} (0x{value:x}), max: {LITERAL_MAX} (0x{LITERAL_MAX:x})"); 201 212 value 202 213 } 203 214 204 - fn encode_standard(op: u32, a: &Register, b: &Register, c: &Register) -> u32 { 215 + const fn encode_standard(op: u32, a: Register, b: Register, c: Register) -> u32 { 205 216 (op << 28) | a.encode_a() | b.encode_b() | c.encode_c() 206 217 } 207 218 208 219 #[cfg(test)] 209 220 mod tests { 210 221 use super::*; 211 - use crate::{Operation, Register::*}; 222 + use crate::ops::Operation; 223 + use crate::reg::Register::*; 212 224 213 225 #[test] 214 226 fn wide_str() {
+6 -5
src/asm/lexer.rs
··· 12 12 // You should have received a copy of the GNU General Public License along with 13 13 // this program. If not, see <https://www.gnu.org/licenses/>. 14 14 // 15 - use crate::Register; 15 + 16 + use crate::reg::Register; 16 17 use logos::{Lexer, Logos}; 17 18 18 19 #[derive(Clone, Debug, Default, PartialEq, Eq)] ··· 20 21 pub line: usize, 21 22 } 22 23 23 - #[derive(Logos, Debug, PartialEq)] 24 + #[derive(Logos, Debug, PartialEq, Eq)] 24 25 #[logos(skip r"[ \t\f,]+", extras = Extras)] 25 26 pub enum Token<'source> { 26 27 #[token("\n", lex_newline)] ··· 70 71 lexer.extras.line += 1; 71 72 } 72 73 73 - fn lex_label<'source>(lex: &mut Lexer<'source, Token<'source>>) -> &'source str { 74 + fn lex_label<'source>(lex: &Lexer<'source, Token<'source>>) -> &'source str { 74 75 let slice = lex.slice(); 75 76 &slice[..slice.len() - 1] 76 77 } 77 78 78 - fn lex_number<'source>(lex: &mut Lexer<'source, Token<'source>>) -> u32 { 79 + fn lex_number<'source>(lex: &Lexer<'source, Token<'source>>) -> u32 { 79 80 let slice = &lex.slice(); 80 81 if slice.starts_with("0x") { 81 82 u32::from_str_radix(slice.trim_start_matches("0x"), 16).unwrap() ··· 118 119 remainder 119 120 } 120 121 121 - fn lex_register<'source>(lex: &mut Lexer<'source, Token<'source>>) -> Register { 122 + fn lex_register<'source>(lex: &Lexer<'source, Token<'source>>) -> Register { 122 123 let slice = lex.slice(); 123 124 let index = slice[1..] 124 125 .parse()
+17 -15
src/asm/parse.rs
··· 12 12 // You should have received a copy of the GNU General Public License along with 13 13 // this program. If not, see <https://www.gnu.org/licenses/>. 14 14 // 15 + 15 16 use super::Token; 16 - use crate::Register; 17 + use crate::reg::Register; 17 18 use logos::{Logos, Source}; 18 19 use std::{borrow::Cow, collections::HashMap, iter::Peekable, ops::Range, str::CharIndices}; 19 20 20 - pub fn parse(_unit: impl std::fmt::Display, source: &str) -> Result<ParsedProgram, Error> { 21 + pub fn parse(_unit: impl std::fmt::Display, source: &str) -> Result<ParsedProgram<'_>, Error> { 21 22 Parser::new(source).parse() 22 23 } 23 24 ··· 94 95 Ok(token) => { 95 96 spanned.push((token, lexer.span())); 96 97 } 97 - Err(error) => Err(Error::new(format!("lex: {error:?}"), &lexer.span()))?, 98 + Err(error) => Err(Error::new(&format!("lex: {error:?}"), &lexer.span()))?, 98 99 } 99 100 } 100 101 ··· 121 122 tokens.next(); 122 123 continue; 123 124 } 124 - _ => Err(Error::new(format!("unexpected token {token:?}"), span))?, 125 + _ => Err(Error::new(&format!("unexpected token {token:?}"), span))?, 125 126 }; 126 127 127 128 nodes.push(node); ··· 153 154 // same identifier. 154 155 if label_span != &span { 155 156 return Err(Error::new( 156 - format!("duplicate label '{label_ident}', original label span: {label_span:?}"), 157 + &format!("duplicate label '{label_ident}', original label span: {label_span:?}"), 157 158 &span, 158 159 )); 159 160 } ··· 206 207 pub struct Error(pub String, pub Range<usize>); 207 208 208 209 impl Error { 209 - fn new(message: impl ToString, span: &Range<usize>) -> Self { 210 + fn new(message: &(impl ToString + ?Sized), span: &Range<usize>) -> Self { 210 211 Self(message.to_string(), span.clone()) 211 212 } 212 213 ··· 223 224 224 225 impl std::error::Error for Error {} 225 226 226 - #[derive(Debug, Default)] 227 + #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] 227 228 pub struct Location { 228 229 pub block: Register, 229 230 pub offset: Register, ··· 279 280 I: Iterator<Item = (Token<'s>, Range<usize>)>, 280 281 { 281 282 let relocatable = true; 282 - let token = tokens.next().ok_or(Error::eof())?; 283 + let token = tokens.next().ok_or_else(Error::eof)?; 283 284 match token { 284 285 (Token::Pragma("u32"), start_span) => { 285 286 let (value, end_span) = consume_number(tokens)?; ··· 302 303 )) 303 304 } 304 305 (Token::Pragma(command), span) => Err(Error::new( 305 - format!("unknown pragma command {command}"), 306 + &format!("unknown pragma command {command}"), 306 307 &span, 307 308 ))?, 308 309 (_, span) => Err(Error::new("unexpected token", &span))?, ··· 404 405 } 405 406 406 407 impl<'s> Instruction<'s> { 408 + #[allow(clippy::too_many_lines)] 407 409 pub fn consume<I>(tokens: &mut Peekable<I>) -> Result<(Self, Range<usize>), Error> 408 410 where 409 411 I: Iterator<Item = (Token<'s>, Range<usize>)>, ··· 653 655 match tokens.next() { 654 656 Some((Token::Register(r), span)) => Ok((r, span)), 655 657 Some((token, span)) => Err(Error::new( 656 - format!("expected a register, found: {token:?}"), 658 + &format!("expected a register, found: {token:?}"), 657 659 &span, 658 660 )), 659 661 None => Err(Error::eof()), ··· 667 669 match tokens.next() { 668 670 Some((Token::Ident(ident), span)) => Ok((ident, span)), 669 671 Some((token, span)) => Err(Error::new( 670 - format!("expected an identifier, found: {token:?}"), 672 + &format!("expected an identifier, found: {token:?}"), 671 673 &span, 672 674 )), 673 675 None => Err(Error::eof()), ··· 681 683 match tokens.next() { 682 684 Some((Token::Number(value), span)) => Ok((value, span)), 683 685 Some((token, span)) => Err(Error::new( 684 - format!("expected a number literal, found: {token:?}"), 686 + &format!("expected a number literal, found: {token:?}"), 685 687 &span, 686 688 )), 687 689 None => Err(Error::eof()), ··· 698 700 Ok((unescaped, span)) 699 701 } 700 702 Some((token, span)) => Err(Error::new( 701 - format!("expected a number literal, found: {token:?}"), 703 + &format!("expected a number literal, found: {token:?}"), 702 704 &span, 703 705 )), 704 706 None => Err(Error::eof()), 705 707 } 706 708 } 707 709 708 - fn merge_spans(start: &Range<usize>, end: &Range<usize>) -> Range<usize> { 710 + const fn merge_spans(start: &Range<usize>, end: &Range<usize>) -> Range<usize> { 709 711 start.start..end.end 710 712 } 711 713 ··· 713 715 #[allow(unused)] 714 716 pub struct InvalidCharacterEscape(pub char, pub usize); 715 717 716 - pub fn unescape_str(s: &str) -> Result<Cow<str>, InvalidCharacterEscape> { 718 + pub fn unescape_str(s: &str) -> Result<Cow<'_, str>, InvalidCharacterEscape> { 717 719 fn escape_inner(c: &str, i: &mut CharIndices<'_>) -> Result<String, InvalidCharacterEscape> { 718 720 let mut buffer = c.to_owned(); 719 721 let mut in_escape = true;
+17 -26
src/bin/uasm.rs
··· 12 12 // You should have received a copy of the GNU General Public License along with 13 13 // this program. If not, see <https://www.gnu.org/licenses/>. 14 14 // 15 - use std::path::{Path, PathBuf}; 15 + use std::{ 16 + ffi::OsStr, 17 + io, 18 + path::{Path, PathBuf}, 19 + }; 16 20 17 - fn main() { 21 + fn main() -> io::Result<()> { 18 22 let mut output = PathBuf::from("./a.um"); 19 23 20 24 let mut program = Vec::new(); ··· 26 30 } 27 31 _ => { 28 32 let path = Path::new(&arg); 29 - program.extend_from_slice(&match load_program(path) { 30 - Ok(p) => p, 31 - Err(error) => { 32 - eprintln!("{error}"); 33 - std::process::exit(1); 34 - } 35 - }); 33 + program.extend_from_slice(&load_program(path)?); 36 34 } 37 35 } 38 36 } 39 37 40 38 // Convert the program to bytes. 41 - let bytes: Vec<_> = program 42 - .into_iter() 43 - .flat_map(|word| word.to_be_bytes()) 44 - .collect(); 45 - 46 - std::fs::write(&output, bytes).unwrap(); 39 + let bytes: Vec<_> = program.into_iter().flat_map(u32::to_be_bytes).collect(); 40 + std::fs::write(&output, bytes) 47 41 } 48 42 49 - fn load_program(path: &Path) -> std::io::Result<Vec<u32>> { 50 - match path.extension().map(|ext| ext.as_encoded_bytes()) { 51 - Some(b"uasm") | Some(b"asm") => { 52 - let source = std::fs::read_to_string(path)?; 53 - let program = um::asm::assemble(&source); 54 - Ok(program) 55 - } 56 - _ => { 57 - let program = std::fs::read(path)?; 58 - Ok(um::conv::bytes_to_program(&program).unwrap()) 59 - } 43 + fn load_program(path: &Path) -> io::Result<Vec<u32>> { 44 + if let Some(b"uasm" | b"asm") = path.extension().map(OsStr::as_encoded_bytes) { 45 + let source = std::fs::read_to_string(path)?; 46 + let program = um::asm::assemble(&source); 47 + Ok(program) 48 + } else { 49 + let program = std::fs::read(path)?; 50 + Ok(um::conv::bytes_to_program(&program).unwrap()) 60 51 } 61 52 }
+9 -1
src/conv.rs
··· 12 12 // You should have received a copy of the GNU General Public License along with 13 13 // this program. If not, see <https://www.gnu.org/licenses/>. 14 14 // 15 + 15 16 const WORD_LEN: usize = std::mem::size_of::<u32>(); 16 17 17 18 #[derive(Debug)] ··· 19 20 20 21 /// Converts a byte slice to a program. 21 22 /// 22 - /// Returns `None` if the byte slice is not a multiple of 4 bytes in length. 23 + /// # Errors 24 + /// 25 + /// Returns `Err(InvalidProgram)` if the program is not a whole number of 32-bit 26 + /// instructions. 27 + /// 28 + // 29 + // This does not panic. 30 + #[allow(clippy::missing_panics_doc)] 23 31 pub fn bytes_to_program(bytes: &[u8]) -> Result<Vec<u32>, InvalidProgram> { 24 32 if bytes.len().rem_euclid(WORD_LEN) != 0 { 25 33 return Err(InvalidProgram);
+1 -1
src/lib.rs
··· 15 15 16 16 #[cfg(feature = "asm")] 17 17 pub mod asm; 18 + 18 19 pub mod conv; 19 20 pub mod ops; 20 21 pub mod reg; 21 22 22 23 mod universal_machine; 23 - 24 24 pub use universal_machine::Um;
+10 -4
src/main.rs
··· 12 12 // You should have received a copy of the GNU General Public License along with 13 13 // this program. If not, see <https://www.gnu.org/licenses/>. 14 14 // 15 + 16 + #[cfg(feature = "asm")] 17 + mod asm; 18 + 19 + mod conv; 15 20 mod ops; 16 21 mod reg; 17 22 mod universal_machine; 18 23 19 24 use std::{path::Path, time::Instant}; 25 + 20 26 use universal_machine::Um; 21 27 22 28 fn main() { ··· 57 63 // Unfortunately this leads some wierd code generation fuckery which 58 64 // makes the version without the 'asm' feature ~1-2 seconds slower 59 65 // when running the sandmark program. 60 - Some(b"uasm") | Some(b"asm") => { 66 + Some(b"uasm" | b"asm") => { 61 67 let source = std::fs::read_to_string(path)?; 62 - Ok(um::asm::assemble(&source)) 68 + Ok(asm::assemble(&source)) 63 69 } 64 70 _ => { 65 71 let program = std::fs::read(path)?; 66 - Ok(um::conv::bytes_to_program(&program).unwrap()) 72 + Ok(conv::bytes_to_program(&program).unwrap()) 67 73 } 68 74 } 69 75 } ··· 71 77 #[cfg(not(feature = "asm"))] 72 78 fn load_program(path: &Path) -> std::io::Result<Vec<u32>> { 73 79 let program = std::fs::read(path)?; 74 - Ok(um::conv::bytes_to_program(&program).unwrap()) 80 + Ok(conv::bytes_to_program(&program).unwrap()) 75 81 }
+19 -16
src/ops.rs
··· 12 12 // You should have received a copy of the GNU General Public License along with 13 13 // this program. If not, see <https://www.gnu.org/licenses/>. 14 14 // 15 + 15 16 use crate::reg::Register; 16 17 17 18 #[derive(Clone, Copy, Debug, PartialEq, Eq)] ··· 157 158 let a = Register::from_u8(((value >> 6) & 0x07) as u8); 158 159 let b = Register::from_u8(((value >> 3) & 0x07) as u8); 159 160 let c = Register::from_u8((value & 0x07) as u8); 160 - match value & 0xf0000000 { 161 - 0x00000000 => Self::ConditionalMove { a, b, c }, 162 - 0x10000000 => Self::ArrayIndex { a, b, c }, 163 - 0x20000000 => Self::ArrayAmendment { a, b, c }, 164 - 0x30000000 => Self::Addition { a, b, c }, 165 - 0x40000000 => Self::Multiplication { a, b, c }, 166 - 0x50000000 => Self::Division { a, b, c }, 167 - 0x60000000 => Self::NotAnd { a, b, c }, 168 - 0x70000000 => Self::Halt, 169 - 0x80000000 => Self::Allocation { b, c }, 170 - 0x90000000 => Self::Abandonment { c }, 171 - 0xa0000000 => Self::Output { c }, 172 - 0xb0000000 => Self::Input { c }, 173 - 0xc0000000 => Self::LoadProgram { b, c }, 174 - 0xd0000000 => { 161 + match value & 0xf000_0000 { 162 + 0x0000_0000 => Self::ConditionalMove { a, b, c }, 163 + 0x1000_0000 => Self::ArrayIndex { a, b, c }, 164 + 0x2000_0000 => Self::ArrayAmendment { a, b, c }, 165 + 0x3000_0000 => Self::Addition { a, b, c }, 166 + 0x4000_0000 => Self::Multiplication { a, b, c }, 167 + 0x5000_0000 => Self::Division { a, b, c }, 168 + 0x6000_0000 => Self::NotAnd { a, b, c }, 169 + 0x7000_0000 => Self::Halt, 170 + 0x8000_0000 => Self::Allocation { b, c }, 171 + 0x9000_0000 => Self::Abandonment { c }, 172 + 0xa000_0000 => Self::Output { c }, 173 + 0xb000_0000 => Self::Input { c }, 174 + 0xc000_0000 => Self::LoadProgram { b, c }, 175 + 0xd000_0000 => { 175 176 let a = Register::from_u8(((value >> 25) & 0x07) as u8); 176 - let value = value & 0x01ffffff; 177 + let value = value & 0x01ff_ffff; 177 178 Self::Orthography { a, value } 178 179 } 179 180 _ => Self::IllegalInstruction, ··· 181 182 } 182 183 } 183 184 185 + /// Decode a Universal Machine program into a [`Vec`] of [`Operation`]s. 186 + #[must_use] 184 187 pub fn decode(ops: &[u32]) -> Vec<Operation> { 185 188 ops.iter() 186 189 .map(|&encoded| Operation::from(encoded))
+37 -24
src/reg.rs
··· 12 12 // You should have received a copy of the GNU General Public License along with 13 13 // this program. If not, see <https://www.gnu.org/licenses/>. 14 14 // 15 + 15 16 /// A reference to a register of the UM-32. 16 17 #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] 17 18 pub enum Register { ··· 33 34 } 34 35 35 36 impl Register { 36 - /// Encodes the register as the 'a' parameter of an encoded 37 + /// Create a [`Register`] from a register index. 38 + /// 39 + /// # Panics 40 + /// 41 + /// Panics if `index` is not in `0..8`. 42 + /// 43 + #[must_use] 44 + pub const fn from_u8(index: u8) -> Self { 45 + match index { 46 + 0 => Self::R0, 47 + 1 => Self::R1, 48 + 2 => Self::R2, 49 + 3 => Self::R3, 50 + 4 => Self::R4, 51 + 5 => Self::R5, 52 + 6 => Self::R6, 53 + 7 => Self::R7, 54 + _ => panic!("register index must be in range 0..8"), 55 + } 56 + } 57 + } 58 + 59 + #[cfg(feature = "asm")] 60 + impl Register { 61 + /// Encode the register as the 'a' parameter of an encoded 37 62 /// instruction (bits 6..=8). 38 - pub fn encode_a(self) -> u32 { 63 + #[must_use] 64 + pub const fn encode_a(self) -> u32 { 39 65 ((self as u32) & 0x7) << 6 40 66 } 41 67 42 - /// Encodes the register as the 'b' parameter of an encoded 68 + /// Encode the register as the 'b' parameter of an encoded 43 69 /// instruction (bits 3..=5). 44 - pub fn encode_b(self) -> u32 { 70 + #[must_use] 71 + pub const fn encode_b(self) -> u32 { 45 72 ((self as u32) & 0x7) << 3 46 73 } 47 74 48 - /// Encodes the register as the 'c' parameter of an encoded 75 + /// Encode the register as the 'c' parameter of an encoded 49 76 /// instruction (bits 0..=2). 50 - pub fn encode_c(self) -> u32 { 77 + #[must_use] 78 + pub const fn encode_c(self) -> u32 { 51 79 (self as u32) & 0x7 52 80 } 53 81 54 - /// Encodes the register as the 'a' parameter of an `Orthography` 82 + /// Encode the register as the 'a' parameter of an `Orthography` 55 83 /// operation. 56 84 /// 57 85 /// This is *only* valid for `Orthography` operations. 58 - pub fn encode_a_ortho(self) -> u32 { 86 + #[must_use] 87 + pub const fn encode_a_ortho(self) -> u32 { 59 88 ((self as u32) & 0x7) << 25 60 89 } 61 - 62 - pub fn from_u8(index: u8) -> Self { 63 - match index { 64 - 0 => Register::R0, 65 - 1 => Register::R1, 66 - 2 => Register::R2, 67 - 3 => Register::R3, 68 - 4 => Register::R4, 69 - 5 => Register::R5, 70 - 6 => Register::R6, 71 - 7 => Register::R7, 72 - _ => unreachable!(), 73 - } 74 - } 75 90 } 76 91 77 92 /// A set of registers. ··· 80 95 81 96 impl std::ops::Index<Register> for Page { 82 97 type Output = u32; 83 - #[inline(always)] 84 98 fn index(&self, index: Register) -> &Self::Output { 85 99 &self.0[index as usize] 86 100 } 87 101 } 88 102 89 103 impl std::ops::IndexMut<Register> for Page { 90 - #[inline(always)] 91 104 fn index_mut(&mut self, index: Register) -> &mut Self::Output { 92 105 &mut self.0[index as usize] 93 106 }
+109 -37
src/universal_machine.rs
··· 1 + // Copyright (C) 2025 Thom Hayward. 2 + // 3 + // This program is free software: you can redistribute it and/or modify it under 4 + // the terms of the GNU General Public License as published by the Free Software 5 + // Foundation, version 3. 6 + // 7 + // This program is distributed in the hope that it will be useful, but WITHOUT 8 + // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 9 + // FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 10 + // details. 11 + // 12 + // You should have received a copy of the GNU General Public License along with 13 + // this program. If not, see <https://www.gnu.org/licenses/>. 14 + // 15 + 1 16 use std::io::Read; 2 17 use std::io::Write; 3 18 ··· 6 21 use crate::ops::Operation; 7 22 use crate::reg::{Page, Register}; 8 23 9 - pub(crate) const SMALLVEC_SIZE: usize = 24; 24 + pub const SMALLVEC_SIZE: usize = 24; 10 25 11 26 /// Lossless conversion to `usize`. 12 27 /// 13 28 /// This should only be implemented on types which can be losslessly 14 29 /// cast to a `usize`. 15 - pub(crate) trait IntoIndex: Sized + Copy { 30 + trait IntoIndex: Sized + Copy { 16 31 fn into_index(self) -> usize; 17 32 } 18 33 ··· 36 51 #[derive(Default)] 37 52 pub struct Um<'a> { 38 53 pub program_counter: u32, 39 - pub(crate) registers: Page, 54 + pub registers: Page, 40 55 /// Program memory, modelled as a `Vec` of `SmallVec`. 41 56 /// 42 57 /// Memory allocations greater than `SMALLVEC_SIZE` will incur a memory 43 58 /// indirection penalty for every memory access within that block. 44 - pub(crate) memory: Vec<SmallVec<[u32; SMALLVEC_SIZE]>>, 45 - pub(crate) free_blocks: Vec<u32>, 59 + pub memory: Vec<SmallVec<[u32; SMALLVEC_SIZE]>>, 60 + pub free_blocks: Vec<u32>, 46 61 /// Partially decoded operations cache. 47 - pub(crate) ops: Vec<Operation>, 48 - pub(crate) stdin: Option<&'a mut dyn Read>, 49 - pub(crate) stdout: Option<&'a mut dyn Write>, 62 + pub ops: Vec<Operation>, 63 + pub stdin: Option<&'a mut dyn Read>, 64 + pub stdout: Option<&'a mut dyn Write>, 50 65 } 51 66 52 67 impl<'a> Um<'a> { 53 68 /// Initialise a Universal Machine with the specified program scroll. 69 + #[must_use] 54 70 pub fn new(program: Vec<u32>) -> Self { 55 71 let ops = crate::ops::decode(&program); 56 72 Self { ··· 61 77 } 62 78 63 79 /// Sets the output for the universal machine. 80 + #[must_use] 64 81 pub fn stdout<T: Write>(mut self, stdout: &'a mut T) -> Self { 65 82 self.stdout.replace(stdout); 66 83 self 67 84 } 68 85 69 86 /// Sets the input for the universal machine. 87 + #[must_use] 70 88 pub fn stdin<T: Read>(mut self, stdin: &'a mut T) -> Self { 71 89 self.stdin.replace(stdin); 72 90 self 73 91 } 74 92 75 - /// Begins the spin-cycle of the universal machine. 93 + /// Begin the spin-cycle of the Universal Machine. 94 + /// 95 + /// # Panics 96 + /// 97 + /// Panics if the machine encounters an illegal instruction. 98 + /// 76 99 #[inline(never)] 100 + #[allow(clippy::return_self_not_must_use, clippy::must_use_candidate)] 77 101 pub fn run(mut self) -> Self { 78 102 loop { 79 103 // println!( ··· 137 161 // } 138 162 139 163 /// Loads the value from the specified register. 140 - pub(crate) fn load_register(&self, register: Register) -> u32 { 164 + #[must_use] 165 + pub fn load_register(&self, register: Register) -> u32 { 141 166 self.registers[register] 142 167 } 143 168 144 169 /// Saves a value to the specified register. 145 - pub(crate) fn save_register(&mut self, register: Register, value: u32) { 170 + pub fn save_register(&mut self, register: Register, value: u32) { 146 171 self.registers[register] = value; 147 172 } 148 173 149 - pub(crate) fn conditional_move(&mut self, a: Register, b: Register, c: Register) { 174 + pub fn conditional_move(&mut self, a: Register, b: Register, c: Register) { 150 175 if self.load_register(c) != 0 { 151 176 self.save_register(a, self.load_register(b)); 152 177 } 153 178 } 154 179 155 - pub(crate) fn array_index(&mut self, a: Register, b: Register, c: Register) { 180 + pub fn array_index(&mut self, a: Register, b: Register, c: Register) { 156 181 let block = self.load_register(b); 157 182 let offset = self.load_register(c); 158 183 self.save_register(a, self.load_memory(block, offset)); 159 184 } 160 185 161 - pub(crate) fn array_amendment(&mut self, a: Register, b: Register, c: Register) { 186 + pub fn array_amendment(&mut self, a: Register, b: Register, c: Register) { 162 187 let block = self.load_register(a); 163 188 let offset = self.load_register(b); 164 189 let value = self.load_register(c); 165 190 self.store_memory(block, offset, value); 166 191 } 167 192 168 - pub(crate) fn addition(&mut self, a: Register, b: Register, c: Register) { 193 + pub fn addition(&mut self, a: Register, b: Register, c: Register) { 169 194 self.save_register(a, self.load_register(b).wrapping_add(self.load_register(c))); 170 195 } 171 196 172 - pub(crate) fn multiplication(&mut self, a: Register, b: Register, c: Register) { 197 + pub fn multiplication(&mut self, a: Register, b: Register, c: Register) { 173 198 self.save_register(a, self.load_register(b).wrapping_mul(self.load_register(c))); 174 199 } 175 200 176 - pub(crate) fn division(&mut self, a: Register, b: Register, c: Register) { 201 + pub fn division(&mut self, a: Register, b: Register, c: Register) { 177 202 self.save_register(a, self.load_register(b).wrapping_div(self.load_register(c))); 178 203 } 179 204 180 - pub(crate) fn not_and(&mut self, a: Register, b: Register, c: Register) { 205 + pub fn not_and(&mut self, a: Register, b: Register, c: Register) { 181 206 self.save_register(a, !(self.load_register(b) & self.load_register(c))); 182 207 } 183 208 184 - pub(crate) fn allocation(&mut self, b: Register, c: Register) { 209 + pub fn allocation(&mut self, b: Register, c: Register) { 185 210 let length = self.load_register(c); 186 211 let index = self.allocate_memory(length); 187 212 self.save_register(b, index); 188 213 } 189 214 190 - pub(crate) fn abandonment(&mut self, c: Register) { 215 + pub fn abandonment(&mut self, c: Register) { 191 216 let block = self.load_register(c); 192 217 self.free_memory(block); 193 218 } 194 219 195 - pub(crate) fn output(&mut self, c: Register) { 220 + /// Write the value in the specified register to stdout. 221 + /// 222 + /// # Panics 223 + /// 224 + /// Panics if writing to stdout fails. 225 + /// 226 + pub fn output(&mut self, c: Register) { 196 227 let value = self.load_register(c); 197 228 if let Some(stdout) = self.stdout.as_mut() { 198 229 let buffer = [(value & 0xff) as u8]; ··· 200 231 } 201 232 } 202 233 203 - pub(crate) fn input(&mut self, c: Register) { 234 + /// Read a value from stdin into the specifed register. 235 + // 236 + // The `as` cast below benchmarks faster than using u32::from. 237 + #[allow(clippy::cast_lossless)] 238 + pub fn input(&mut self, c: Register) { 204 239 if let Some(stdin) = self.stdin.as_mut() { 205 240 let mut buffer = vec![0]; 206 241 match stdin.read_exact(&mut buffer) { ··· 212 247 } 213 248 } 214 249 215 - pub(crate) fn load_program(&mut self, b: Register, c: Register) { 250 + pub fn load_program(&mut self, b: Register, c: Register) { 216 251 let block = self.load_register(b); 217 252 218 253 // Source array is always copied to array[0], but there ··· 226 261 self.program_counter = self.load_register(c); 227 262 } 228 263 229 - pub(crate) fn orthography(&mut self, a: Register, value: u32) { 264 + pub fn orthography(&mut self, a: Register, value: u32) { 230 265 self.save_register(a, value); 231 266 } 232 267 268 + /// Print the current instruction, program counter, and registers to stderr and quit. 269 + /// 270 + /// # Panics 271 + /// 272 + /// Panics when called. Every time. 273 + /// 233 274 #[cold] 234 275 #[inline(never)] 235 - pub(crate) fn illegal_instruction(&self) -> ! { 276 + pub fn illegal_instruction(&self) -> ! { 236 277 panic!( 237 278 "illegal instruction: {:08x}, pc: {:08x}, r: {:08x?}", 238 279 self.memory[0][self.program_counter.into_index()], ··· 241 282 ) 242 283 } 243 284 244 - pub(crate) fn load_memory(&self, block: u32, offset: u32) -> u32 { 285 + /// Load a value from `offset` in the specified memory `block`. 286 + /// 287 + /// # Panics 288 + /// 289 + /// Panics if the `block` is not an allocated block, or if `offset` overflows the current 290 + /// memory block. 291 + /// 292 + #[must_use] 293 + pub fn load_memory(&self, block: u32, offset: u32) -> u32 { 245 294 let block = block.into_index(); 246 295 let offset = offset.into_index(); 247 296 assert!(block < self.memory.len() && offset < self.memory[block].len()); 248 297 self.memory[block][offset] 249 298 } 250 299 251 - pub(crate) fn store_memory(&mut self, block: u32, offset: u32, value: u32) { 300 + /// Store a `value` at `offset` in the specified memory `block`. 301 + /// 302 + /// # Panics 303 + /// 304 + /// Panics if the `block` is not an allocated block, or if `offset` overflows the current 305 + /// memory block. 306 + /// 307 + pub fn store_memory(&mut self, block: u32, offset: u32, value: u32) { 252 308 let block = block.into_index(); 253 309 let offset = offset.into_index(); 254 310 assert!(block < self.memory.len() && offset < self.memory[block].len()); 255 - self.memory[block][offset] = value 311 + self.memory[block][offset] = value; 256 312 } 257 313 258 314 /// Duplicates a block of memory. 259 315 /// 260 316 /// The block is copied to the first block of memory. 261 - pub(crate) fn duplicate_memory(&mut self, block: u32) -> &[u32] { 317 + /// 318 + /// # Panics 319 + /// 320 + /// Panics if the `block` is not an allocated block of memory. 321 + /// 322 + pub fn duplicate_memory(&mut self, block: u32) -> &[u32] { 262 323 let block = block.into_index(); 263 324 assert!(block < self.memory.len()); 264 325 self.memory[0] = self.memory[block].clone(); ··· 266 327 } 267 328 268 329 /// Allocates a block of memory of the specified length. 269 - pub(crate) fn allocate_memory(&mut self, length: u32) -> u32 { 330 + #[allow(clippy::cast_possible_truncation)] 331 + pub fn allocate_memory(&mut self, length: u32) -> u32 { 270 332 if let Some(index) = self.free_blocks.pop() { 271 333 self.memory[index.into_index()] = Self::new_block(length.into_index()); 272 334 index 273 335 } else { 274 336 self.memory.push(Self::new_block(length.into_index())); 337 + 338 + // The Universal Machine only deals with 32-bit values, so truncation here is 339 + // impossible. 275 340 (self.memory.len() - 1) as u32 276 341 } 277 342 } 278 343 279 - /// Frees a block of memory. 280 - pub(crate) fn free_memory(&mut self, block: u32) { 344 + /// Free a block of memory. 345 + /// 346 + /// # Panics 347 + /// 348 + /// Panics if `block` is not an allocated block of memory. 349 + /// 350 + pub fn free_memory(&mut self, block: u32) { 281 351 assert!(block.into_index() < self.memory.len()); 282 352 self.free_blocks.push(block); 283 353 self.memory[block.into_index()] = Self::new_block(0); 284 354 } 285 355 286 - /// Creates a new block of memory. 356 + /// Create a new block of memory. 287 357 /// 288 358 /// The block is initialised with `len` zeroes. 289 - pub(crate) fn new_block(len: usize) -> SmallVec<[u32; SMALLVEC_SIZE]> { 359 + /// 360 + #[must_use] 361 + pub fn new_block(len: usize) -> SmallVec<[u32; SMALLVEC_SIZE]> { 290 362 smallvec::smallvec![0; len] 291 363 } 292 364 } 293 365 294 366 #[cfg(test)] 295 - pub(crate) mod tests { 367 + mod tests { 296 368 use super::*; 297 369 298 370 #[test] ··· 309 381 #[test] 310 382 #[cfg(feature = "asm")] 311 383 fn hello_world() { 312 - let program = asm::assemble(include_str!("../files/hello-world.asm")); 384 + let program = crate::asm::assemble(include_str!("../files/hello-world.asm")); 313 385 let mut buffer = Vec::new(); 314 386 Um::new(program).stdout(&mut buffer).run(); 315 387 assert_eq!(&buffer, b"Hello, world!\n"); ··· 318 390 #[test] 319 391 #[cfg(feature = "asm")] 320 392 fn cat() { 321 - let program = asm::assemble(include_str!("../files/cat.asm")); 393 + let program = crate::asm::assemble(include_str!("../files/cat.asm")); 322 394 let input = include_bytes!("lib.rs"); 323 395 324 396 let mut reader = std::io::Cursor::new(input);