Skip to content

Commit 76a611f

Browse files
author
DigitalCodeCrafter
committed
added ast validation
1 parent f023387 commit 76a611f

File tree

11 files changed

+841
-198
lines changed

11 files changed

+841
-198
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
name = "ksmc"
33
version = "0.1.0"
44
edition = "2024"
5+
license = "MIT"
56

67
[dependencies]
78
flate2 = "1.1.4"

README.md

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# ksmc
2+
3+
A rust-based, assembler, disassembler, and compiler for **kOS** - a scripting mod for Kerbal Space Program.
4+
5+
## Features
6+
7+
- **Assembler & Disassembler** for `.ksm` (kOS machine code) binaries
8+
- **Compiler** from high-level code to kerboscipt bytecode
9+
10+
## Building
11+
12+
Requires **Rust 1.86+**
13+
14+
```bash
15+
git clone https://github.com/<your-username>/ksmc.git
16+
cd ksmc
17+
cargo build --release
18+
```
19+
20+
## Usage
21+
22+
Assemble a `.ksm` binary:
23+
24+
```bash
25+
./target/release/ksmc input.kasm -o output.ksm
26+
```
27+
28+
Disassemble a `.ksm` binary:
29+
30+
```bash
31+
./target/release/ksmc input.ksm -o output.kasm --dump
32+
```
33+
34+
look at `src/main.rs` for further info
35+
36+
## License
37+
38+
Licensed under [MIT License](LICENSE)
39+
40+
## Roadmap
41+
42+
- minimal working compiler
43+
- optimization passes
44+
- compiler safety options
45+
46+
## NOTE
47+
48+
This project is not affiliated with the official [kOS](https://ksp-kos.github.io/KOS/) team.
49+
It’s an independent project for learning and personal interest.

src/compiler.rs

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// [/] parser
44
// [/] expander
55
// [/] resolver
6-
// [ ] validator
6+
// [x] validator
77
// [ ] type checker
88
// [ ] lowerer
99
// [ ] optimizer
@@ -18,15 +18,18 @@ mod parser;
1818
mod ast;
1919
mod expander;
2020
mod resolver;
21+
mod validator;
22+
mod type_checker;
2123

22-
type CResult<T> = Result<T, CompilerError>;
2324
#[derive(Debug)]
2425
pub enum CompilerError {
2526
Error { text: String },
2627

2728
LexError(Vec<lexer::LexError>),
2829
ParseError(Vec<parser::ParseError>),
2930
ExpandError(expander::ExpandError),
31+
ResolveError(Vec<resolver::ResolveError>),
32+
ValidationError(Vec<validator::ValidationError>),
3033

3134
IoError(std::io::Error),
3235
}
@@ -39,34 +42,28 @@ impl std::fmt::Display for CompilerError {
3942
LexError(err) => write!(f, "Error: {:?}", err),
4043
ParseError(err) => write!(f, "Error: {:?}", err),
4144
ExpandError(err) => write!(f, "Error: {:?}", err),
45+
ResolveError(err) => write!(f, "Error: {:?}", err),
46+
ValidationError(err) => write!(f, "Error: {:?}", err),
4247

4348
IoError(e) => write!(f, "{}", e),
4449
}
4550
}
4651
}
4752
impl std::error::Error for CompilerError {}
4853

49-
trait ToCompileResult<T> {
50-
fn into_cresult(self) -> CResult<T>;
51-
}
52-
5354
pub fn compile(file_path: impl AsRef<Path>) -> Result<(), CompilerError> {
5455
let src = std::fs::read_to_string(&file_path)
5556
.map_err(|e| CompilerError::IoError(e))?;
5657

57-
let tokens = lexer::lex_all(&src).into_cresult()?;
58+
let tokens = lexer::lex_all(&src)?;
5859

59-
let mut ast = parser::parse_tokens(tokens).into_cresult()?;
60+
let mut ast = parser::parse_all(tokens)?;
6061

61-
let mut expander = expander::Expander::new(&mut ast, file_path.as_ref().parent().unwrap(), |src| {
62-
lexer::lex_all(src).into_cresult()
63-
}, |tokens| {
64-
parser::parse_tokens(tokens).into_cresult()
65-
});
66-
expander.expand_modules()?;
62+
expander::expand_all(&mut ast, file_path.as_ref().parent().unwrap())?;
6763

68-
let mut resolver = resolver::Resolver::new(&ast);
69-
let result = resolver.resolve();
64+
let symbols = resolver::resolve_all(&ast)?;
65+
66+
validator::validate_all(&ast)?;
7067

7168
todo!()
7269
}

src/compiler/ast.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ pub enum NodeKind {
2626
ArrayRepeat { value: NodeId, count: NodeId },
2727
UnderscoreExpr,
2828
IndexExpression { array: NodeId , index: NodeId },
29-
TupleIndexExpression { tuple: NodeId , index: i32 },
29+
TupleIndexExpression { tuple: NodeId , index: u32 },
3030
PathExpression { segments: Vec<NodeId> },
3131
ErrorExpr,
3232

@@ -282,7 +282,7 @@ impl Node {
282282

283283
#[derive(Debug, Clone)]
284284
pub enum Literal {
285-
Int(i32),
285+
Int(u32),
286286
Float(f64),
287287
Bool(bool),
288288
Str(String),

src/compiler/expander.rs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,27 @@
11
use std::{collections::HashSet, path::{Path, PathBuf}};
2-
use crate::compiler::{ast::{Node, NodeId, NodeKind, AST}, lexer::Token, CompilerError, ToCompileResult};
2+
use crate::compiler::{ast::{Node, NodeId, NodeKind, AST}, lexer::Token, CompilerError};
33

44
#[derive(Debug)]
55
pub enum ExpandError {
66
NoProgramRoot,
77
NotFound(PathBuf),
88
FileRead(PathBuf, std::io::Error),
99
}
10-
impl<T> ToCompileResult<T> for Result<T, ExpandError> {
11-
fn into_cresult(self) -> Result<T, super::CompilerError> {
12-
self.map_err(|err| CompilerError::ExpandError(err))
10+
impl From<ExpandError> for CompilerError {
11+
fn from(value: ExpandError) -> Self {
12+
CompilerError::ExpandError(value)
1313
}
1414
}
1515

16+
pub fn expand_all(ast: &mut AST, base_path: &Path) -> Result<(), CompilerError> {
17+
let mut expander = Expander::new(ast, base_path, |src| {
18+
super::lexer::lex_all(src).map_err(|e| e.into())
19+
}, |tokens| {
20+
super::parser::parse_all(tokens).map_err(|e| e.into())
21+
});
22+
expander.expand_modules()
23+
}
24+
1625
pub struct Expander<'a> {
1726
pub ast: &'a mut AST,
1827
pub base_path: &'a Path,
@@ -70,7 +79,7 @@ impl<'a> Expander<'a> {
7079

7180
fn expand_module_file(&mut self, node_id: NodeId, path: &Path) -> Result<(), CompilerError> {
7281
if !path.exists() {
73-
return Err(ExpandError::NotFound(path.to_path_buf())).into_cresult();
82+
return Err(ExpandError::NotFound(path.to_path_buf()).into());
7483
}
7584

7685
// already expanded
@@ -81,7 +90,7 @@ impl<'a> Expander<'a> {
8190

8291
// read and parse
8392
let src = std::fs::read_to_string(path)
84-
.map_err(|e| ExpandError::FileRead(path.to_path_buf(), e)).into_cresult()?;
93+
.map_err(|e| ExpandError::FileRead(path.to_path_buf(), e))?;
8594

8695
let tokens = (self.lexer)(&src)?;
8796
let mut parsed_ast = (self.parser)(tokens)?;

src/compiler/lexer.rs

Lines changed: 38 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// Source text -> Tokens
2-
use crate::compiler::{ast::{Pos, Span}, CompilerError, ToCompileResult};
2+
use crate::compiler::{ast::{Pos, Span}, CompilerError};
33

44
const CASE_SENSITIVITY: bool = true;
55

@@ -8,18 +8,17 @@ pub struct LexError {
88
pub span: Span,
99
pub message: String,
1010
}
11-
12-
impl<T> ToCompileResult<T> for Result<T, Vec<LexError>> {
13-
fn into_cresult(self) -> Result<T, super::CompilerError> {
14-
self.map_err(|err| CompilerError::LexError(err))
11+
impl From<Vec<LexError>> for CompilerError {
12+
fn from(value: Vec<LexError>) -> Self {
13+
CompilerError::LexError(value)
1514
}
1615
}
1716

1817
#[derive(Debug, Clone, PartialEq)]
1918
pub enum TokenKind {
2019
// Identifiers and Literals
2120
Identifier(String),
22-
Int(i32),
21+
Int(u32),
2322
Float(f64),
2423
String(String),
2524

@@ -90,7 +89,11 @@ pub struct Token {
9089

9190
pub fn lex_all(src: &str) -> Result<Vec<Token>, Vec<LexError>> {
9291
let mut lexer = Lexer::new(&src);
93-
lexer.lex_all()
92+
if lexer.lex_all().is_ok() {
93+
Ok(lexer.tokens)
94+
} else {
95+
Err(lexer.errors)
96+
}
9497
}
9598

9699
pub struct Lexer {
@@ -99,6 +102,7 @@ pub struct Lexer {
99102
errors: Vec<LexError>,
100103
line: usize,
101104
col: usize,
105+
pub tokens: Vec<Token>
102106
}
103107
impl Lexer {
104108
pub fn new(input: &str) -> Self {
@@ -108,27 +112,25 @@ impl Lexer {
108112
errors: Vec::new(),
109113
line: 1,
110114
col: 1,
115+
tokens: Vec::new(),
111116
}
112117
}
113118

114-
pub fn lex_all(&mut self) -> Result<Vec<Token>, Vec<LexError>> {
115-
let mut tokens = Vec::new();
119+
pub fn lex_all(&mut self) -> Result<(), &[LexError]> {
116120
loop {
117-
let tok = self.next_token();
118-
if matches!(tok.kind, TokenKind::EOF) {
119-
tokens.push(tok); break;
120-
} else {
121-
tokens.push(tok);
121+
self.next_token();
122+
if matches!(self.tokens.last(), Some(Token { kind: TokenKind::EOF, .. })) {
123+
break;
122124
}
123125
}
124126
if self.errors.is_empty() {
125-
Ok(tokens)
127+
Ok(())
126128
} else {
127-
Err(self.errors.clone())
129+
Err(&self.errors)
128130
}
129131
}
130132

131-
pub fn next_token(&mut self) -> Token {
133+
fn next_token(&mut self) {
132134
self.skip_whitespace_and_comment();
133135

134136
let start_line = self.line;
@@ -139,22 +141,20 @@ impl Lexer {
139141
};
140142

141143
if c.is_alphabetic() {
142-
self.lex_identifier_or_keyword(start_line, start_col)
144+
self.lex_identifier_or_keyword(start_line, start_col);
143145
} else if c.is_ascii_digit() {
144-
self.lex_number(start_line, start_col)
146+
self.lex_number(start_line, start_col);
145147
} else if c == '.' && self.peek_ahead(1).map_or(false, |n| n.is_ascii_digit()) {
146-
self.lex_number(start_line, start_col)
147-
} else if c == '-' && self.peek_ahead(1).map_or(false, |n| n.is_ascii_digit()) {
148-
self.lex_number(start_line, start_col)
148+
self.lex_number(start_line, start_col);
149149
} else if c == '"' {
150-
self.lex_string(start_line, start_col)
150+
self.lex_string(start_line, start_col);
151151
} else {
152-
self.lex_symbol(start_line, start_col)
152+
self.lex_symbol(start_line, start_col);
153153
}
154154
}
155155

156-
fn make_token(&self, kind: TokenKind, line: usize, col: usize) -> Token {
157-
Token { kind, span: self.make_span(line, col) }
156+
fn make_token(&mut self, kind: TokenKind, line: usize, col: usize) {
157+
self.tokens.push(Token { kind, span: self.make_span(line, col) });
158158
}
159159

160160
fn make_span(&self, line: usize, col: usize) -> Span {
@@ -232,7 +232,7 @@ impl Lexer {
232232

233233
// --------- Identifiers & Keywords ---------
234234

235-
fn lex_identifier_or_keyword(&mut self, line: usize, col: usize) -> Token {
235+
fn lex_identifier_or_keyword(&mut self, line: usize, col: usize) {
236236
let mut s = String::new();
237237
while let Some(c) = self.peek() {
238238
if c.is_alphanumeric() || c == '_' {
@@ -271,7 +271,7 @@ impl Lexer {
271271

272272
// --------- Numbers ---------
273273

274-
fn lex_number(&mut self, line: usize, col: usize) -> Token {
274+
fn lex_number(&mut self, line: usize, col: usize) {
275275
let mut num_str = String::new();
276276
let mut has_dot = false;
277277
let mut has_exp = false;
@@ -338,7 +338,7 @@ impl Lexer {
338338
self.make_token(kind, line, col)
339339
}
340340

341-
fn lex_based_number(&mut self, base: u32,line: usize, col: usize) -> Token {
341+
fn lex_based_number(&mut self, base: u32,line: usize, col: usize) {
342342
let mut s = String::new();
343343
while let Some(c) = self.peek() {
344344
match c {
@@ -363,13 +363,13 @@ impl Lexer {
363363
}
364364

365365
let cleaned = s.replace("_", "");
366-
let value = i32::from_str_radix(&cleaned, base).unwrap_or(0);
366+
let value = u32::from_str_radix(&cleaned, base).unwrap_or(0);
367367
self.make_token(TokenKind::Int(value), line, col)
368368
}
369369

370370
// --------- Strings ---------
371371

372-
fn lex_string(&mut self, line: usize, col: usize) -> Token {
372+
fn lex_string(&mut self, line: usize, col: usize) {
373373
self.advance(); // consume '"'
374374
let mut s = String::new();
375375

@@ -403,7 +403,7 @@ impl Lexer {
403403

404404
// --------- Symbols & Operators ---------
405405

406-
fn lex_symbol(&mut self, line: usize, col: usize) -> Token {
406+
fn lex_symbol(&mut self, line: usize, col: usize) {
407407
use TokenKind::*;
408408
let c = self.advance().unwrap();
409409

@@ -478,7 +478,8 @@ mod tests {
478478
let mut lexer = Lexer::new(src);
479479
let mut i = 0;
480480
loop {
481-
let tok = lexer.next_token();
481+
lexer.next_token();
482+
let tok = lexer.tokens.last().unwrap();
482483
println!("{:?}", tok);
483484
assert_eq!(tok.kind, expected[i]);
484485
if matches!(tok.kind, TokenKind::EOF) { break; }
@@ -500,7 +501,8 @@ mod tests {
500501
let mut lexer = Lexer::new(src);
501502
let mut i = 0;
502503
loop {
503-
let tok = lexer.next_token();
504+
lexer.next_token();
505+
let tok = lexer.tokens.last().unwrap();
504506
println!("{:?}", tok);
505507
assert_eq!(tok.kind, expected[i]);
506508
if matches!(tok.kind, TokenKind::EOF) { break; }
@@ -521,7 +523,8 @@ mod tests {
521523
let mut lexer = Lexer::new(src);
522524
let mut i = 0;
523525
loop {
524-
let tok = lexer.next_token();
526+
lexer.next_token();
527+
let tok = lexer.tokens.last().unwrap();
525528
println!("{:?}", tok);
526529
assert_eq!(tok.kind, expected[i]);
527530
if matches!(tok.kind, TokenKind::EOF) { break; }

0 commit comments

Comments
 (0)