diff --git a/README.md b/README.md index 16bd83a..6244a3b 100644 --- a/README.md +++ b/README.md @@ -17,20 +17,22 @@ The implementation is similar to the concepts in [metalanguage](https://github.c ## Implementations -Meta-notation is available in multiple languages: +Meta-notation is available in multiple languages with identical behavior: - **[JavaScript/TypeScript](./js)** - Full-featured implementation with PEG.js grammar - **[Rust](./rust)** - High-performance implementation with serde support +Both implementations produce the same parsed object structure and pass the same test cases. + ## Features - **Universal Delimiter Parsing**: Parses `()`, `{}`, `[]`, `''`, `""`, `` ` ` `` - **Language Agnostic**: Works with 25+ programming languages and all natural languages - **Nested Structures**: Supports arbitrary nesting of delimiters - **Round-trip Serialization**: Parse and serialize back to original text -- **Multiple Language Implementations**: JavaScript/TypeScript and Rust +- **Multiple Language Implementations**: JavaScript/TypeScript and Rust with identical output - **Simple Grammar**: Clean, efficient parsing -- **Comprehensive Tests**: 81+ test cases for programming and natural languages +- **Comprehensive Tests**: 170+ test cases across both implementations covering programming and natural languages ## Installation @@ -73,11 +75,16 @@ assert_eq!(serialized, code); ## API -### `parse(input: string): Sequence` +### `parse(input) -> Sequence` + +Parses text into a sequence of blocks. Each block has a `type` and `content`. -Parses text into a sequence of blocks. +- **Bracket delimiters** (`paren`, `curly`, `square`): `content` is a nested array of blocks +- **Quote delimiters** (`singleQuote`, `doubleQuote`, `backtick`): `content` is a plain string (no nested parsing inside quotes) +- **Plain text** (`text`): `content` is a string ```typescript +// JavaScript/TypeScript const result = parse('hello (world) {test}'); // Returns: // [ @@ -88,11 +95,108 @@ const result = parse('hello (world) {test}'); // ] ``` -### `serialize(sequence: Sequence): string` +```rust +// Rust +let result = parse("hello (world) {test}"); +// Returns: +// [ +// Block::Text("hello "), +// Block::Paren([Block::Text("world")]), +// Block::Text(" "), +// Block::Curly([Block::Text("test")]) +// ] +``` + +#### Nested Structures + +Bracket delimiters can be nested arbitrarily: + +```typescript +const result = parse('{a [b (c) d] e}'); +// Returns: +// [ +// { type: 'curly', content: [ +// { type: 'text', content: 'a ' }, +// { type: 'square', content: [ +// { type: 'text', content: 'b ' }, +// { type: 'paren', content: [{ type: 'text', content: 'c' }] }, +// { type: 'text', content: ' d' } +// ]}, +// { type: 'text', content: ' e' } +// ]} +// ] +``` + +#### Quotes + +Quotes capture their content as a plain string without further parsing: + +```typescript +const result = parse('"hello {world}"'); +// Returns: +// [ +// { type: 'doubleQuote', content: 'hello {world}' } +// ] +// Note: {world} is NOT parsed as a curly block inside quotes +``` + +#### Real-World Examples + +**JavaScript code:** +```typescript +const result = parse('const greet = (name) => { return `Hello, ${name}!`; };'); +// Returns: +// [ +// { type: 'text', content: 'const greet = ' }, +// { type: 'paren', content: [{ type: 'text', content: 'name' }] }, +// { type: 'text', content: ' => ' }, +// { type: 'curly', content: [ +// { type: 'text', content: ' return ' }, +// { type: 'backtick', content: 'Hello, ${name}!' }, +// { type: 'text', content: '; ' } +// ]}, +// { type: 'text', content: ';' } +// ] +``` + +**JSON:** +```typescript +const result = parse('{"name": "John", "tags": ["dev", "admin"]}'); +// Returns: +// [ +// { type: 'curly', content: [ +// { type: 'doubleQuote', content: 'name' }, +// { type: 'text', content: ': ' }, +// { type: 'doubleQuote', content: 'John' }, +// { type: 'text', content: ', ' }, +// { type: 'doubleQuote', content: 'tags' }, +// { type: 'text', content: ': ' }, +// { type: 'square', content: [ +// { type: 'doubleQuote', content: 'dev' }, +// { type: 'text', content: ', ' }, +// { type: 'doubleQuote', content: 'admin' } +// ]} +// ]} +// ] +``` + +**Natural language:** +```typescript +const result = parse('She said, "Hello, world!" and smiled.'); +// Returns: +// [ +// { type: 'text', content: 'She said, ' }, +// { type: 'doubleQuote', content: 'Hello, world!' }, +// { type: 'text', content: ' and smiled.' } +// ] +``` + +### `serialize(sequence) -> string` Converts a sequence of blocks back to text. ```typescript +// JavaScript/TypeScript const blocks = [ { type: 'text', content: 'hello ' }, { type: 'paren', content: [{ type: 'text', content: 'world' }] } @@ -101,50 +205,87 @@ const text = serialize(blocks); // Returns: "hello (world)" ``` +```rust +// Rust +let blocks = vec![ + Block::Text("hello ".to_string()), + Block::Paren(vec![Block::Text("world".to_string())]), +]; +let text = serialize(&blocks); +// Returns: "hello (world)" +``` + ## Types +### JavaScript/TypeScript + ```typescript type DelimiterType = 'paren' | 'curly' | 'square' | 'singleQuote' | 'doubleQuote' | 'backtick' | 'text'; interface Block { type: DelimiterType; - content: Block[] | string; + content: Block[] | string; // Block[] for brackets, string for quotes and text } type Sequence = Block[]; ``` +### Rust + +```rust +pub enum Block { + Paren(Vec), // () - content is nested blocks + Curly(Vec), // {} - content is nested blocks + Square(Vec), // [] - content is nested blocks + SingleQuote(String), // '' - content is a plain string + DoubleQuote(String), // "" - content is a plain string + Backtick(String), // `` - content is a plain string + Text(String), // plain text +} +``` + +The Rust `Block` enum uses serde's `#[serde(tag = "type", content = "content", rename_all = "camelCase")]` attribute, so it serializes to the same JSON structure as the JavaScript implementation: + +```json +[ + { "type": "text", "content": "hello " }, + { "type": "paren", "content": [{ "type": "text", "content": "world" }] } +] +``` + ## Language Support Meta-notation works seamlessly with both programming languages and natural languages. ### Programming Languages (Tested) -- **JavaScript/TypeScript** - Functions, arrow functions, template literals -- **Python** - Dictionaries, lists, function definitions -- **Go** - Functions, print statements -- **Rust** - Vectors, macros, format strings -- **C++** - Streams, functions, return statements -- **Java** - Classes, methods, arrays -- **C#** - LINQ, collections, generics -- **Ruby** - Methods, string interpolation -- **PHP** - Functions, arrays, associative arrays -- **Swift** - Functions, string interpolation -- **Kotlin** - Functions, lists -- **Scala** - Functions, type annotations -- **Perl** - Subroutines, arrays -- **Haskell** - Pure functions -- **Lisp/Scheme** - S-expressions -- **Clojure** - Vectors, strings -- **Lua** - Functions, string concatenation -- **Elixir** - Functions, string interpolation -- **R** - Functions, paste -- **MATLAB** - Functions -- **SQL** - SELECT statements with WHERE clauses -- **JSON** - Objects and arrays -- **YAML** - Arrays (with bracket syntax) -- **Bash/Shell** - Echo, variables, pipes -- **Markdown** - Code blocks with backticks +| Language | Delimiters Used | Example | +|----------|----------------|---------| +| JavaScript/TypeScript | `() {} \`\`` | `const greet = (name) => { return \`Hello\`; };` | +| Python | `() {} [] ""` | `def calc(x): return {"sum": x, "list": [x]}` | +| Go | `() {} ""` | `func main() { fmt.Println("Hello") }` | +| Rust | `() {} [] ""` | `fn main() { let x = vec![1]; println!("{}", x); }` | +| C++ | `() {} ""` | `int main() { std::cout << "Hello"; }` | +| Java | `() {} [] ""` | `class Main { void main(String[] args) {} }` | +| C# | `() {} ""` | `void Test() { Console.WriteLine("Done"); }` | +| Ruby | `() ""` | `def greet(name); puts "Hello"; end` | +| PHP | `() {} [] ""` | `function test($x) { return ["key" => "val"]; }` | +| Swift | `() {} ""` | `func greet(name: String) { return "Hello" }` | +| Kotlin | `() {} ""` | `fun main() { println("Hello") }` | +| Scala | `() {}` | `def add(x: Int, y: Int): Int = { x + y }` | +| Perl | `() {} ""` | `sub greet { print "Hello\n"; }` | +| Haskell | `""` | `main = putStrLn "Hello, World!"` | +| Lisp/Scheme | `()` | `(define (factorial n) (if (= n 0) 1 (* n 1)))` | +| Clojure | `() [] ""` | `(defn greet [name] (str "Hello"))` | +| Lua | `() ""` | `function greet(name) return "Hello" end` | +| Elixir | `() ""` | `def greet(name), do: "Hello"` | +| R | `() {} ""` | `greet <- function(name) { paste("Hello") }` | +| MATLAB | `()` | `function y = square(x); y = x .^ 2; end` | +| SQL | `""` | `SELECT name FROM users WHERE status = "active"` | +| JSON | `{} [] ""` | `{"name": "John", "tags": ["dev"]}` | +| YAML | `[] ""` | `dependencies: ["react", "typescript"]` | +| Bash/Shell | `""` | `echo "Hello, ${USER}!" \| grep "Hello"` | +| Markdown | `` \`\` `` | ``Here is code: `const x = 1;` in backticks.`` | ### Natural Languages (Tested) @@ -156,13 +297,14 @@ Meta-notation parses natural language text including: - **Academic writing** with nested structures - **Legal text** with section references - **Technical documentation** mixing code and prose -- **Multiple languages**: English, Spanish, French, German, Italian, Portuguese, and more +- **Mathematical expressions**: `f(x) = [a + b] * {c - d}` +- **Multiple languages**: English, Spanish, French, German, Italian, Portuguese, Russian, Japanese, Chinese, and more Works with any language that uses these common delimiters for structure. ## Examples -See the [examples](./src/examples) directory for more detailed usage examples. +See the [examples](./js/src/examples) directory for more detailed usage examples. ## Building @@ -190,6 +332,8 @@ cd js npm test ``` +81 test cases covering parser, serializer, programming languages, and natural languages. + ### Rust ```bash @@ -197,6 +341,14 @@ cd rust cargo test ``` +92 test cases covering the same scenarios plus dedicated parser and serializer unit tests. + +Both implementations verify: +- Exact parsed object structure matches expected output +- Round-trip serialization preserves original text +- All delimiter types are correctly identified +- Nested structures are handled correctly + ## Comparison with Links-Notation | Feature | meta-notation | links-notation | diff --git a/experiments/verify_json_equivalence.rs b/experiments/verify_json_equivalence.rs new file mode 100644 index 0000000..a7c38c1 --- /dev/null +++ b/experiments/verify_json_equivalence.rs @@ -0,0 +1,29 @@ +// Experiment: Verify that Rust serde JSON output matches JS parsed object structure +// Run with: cd /tmp/gh-issue-solver-1774090289115 && cargo test --test verify_json_equivalence -- --nocapture + +use meta_notation::{parse, Block}; + +fn main() { + let test_cases = vec![ + "hello world", + "(hello)", + "{world}", + "[test]", + "'hello'", + "\"world\"", + "`code`", + "hello (world) {test}", + "(a (b) c)", + "{a [b (c) d] e}", + "(){}[]", + "\"hello {world}\"", + ]; + + for input in test_cases { + let result = parse(input); + let json = serde_json::to_string_pretty(&result).unwrap(); + println!("Input: {:?}", input); + println!("JSON: {}", json); + println!("---"); + } +} diff --git a/experiments/verify_json_equivalence_test.rs b/experiments/verify_json_equivalence_test.rs new file mode 100644 index 0000000..b00a722 --- /dev/null +++ b/experiments/verify_json_equivalence_test.rs @@ -0,0 +1,27 @@ +use meta_notation::parse; + +#[test] +fn verify_json_equivalence() { + let test_cases = vec![ + "hello world", + "(hello)", + "{world}", + "[test]", + "'hello'", + "\"world\"", + "`code`", + "hello (world) {test}", + "(a (b) c)", + "{a [b (c) d] e}", + "(){}[]", + "\"hello {world}\"", + ]; + + for input in test_cases { + let result = parse(input); + let json = serde_json::to_string_pretty(&result).unwrap(); + println!("Input: {:?}", input); + println!("JSON: {}", json); + println!("---"); + } +} diff --git a/js/README.md b/js/README.md index 44ac5bf..1ea3150 100644 --- a/js/README.md +++ b/js/README.md @@ -26,12 +26,105 @@ console.log(serialized === code); // true ### `parse(input: string): Sequence` -Parses text into a sequence of blocks. +Parses text into a sequence of blocks. Each block has a `type` (the delimiter kind) and `content` (either a nested array of blocks for bracket delimiters, or a plain string for quotes and text). + +```typescript +const result = parse('hello (world) {test}'); +// Returns: +// [ +// { type: 'text', content: 'hello ' }, +// { type: 'paren', content: [{ type: 'text', content: 'world' }] }, +// { type: 'text', content: ' ' }, +// { type: 'curly', content: [{ type: 'text', content: 'test' }] } +// ] +``` + +Nested structures are supported: + +```typescript +const result = parse('{a [b (c) d] e}'); +// Returns: +// [ +// { type: 'curly', content: [ +// { type: 'text', content: 'a ' }, +// { type: 'square', content: [ +// { type: 'text', content: 'b ' }, +// { type: 'paren', content: [{ type: 'text', content: 'c' }] }, +// { type: 'text', content: ' d' } +// ]}, +// { type: 'text', content: ' e' } +// ]} +// ] +``` + +Quotes capture content as a plain string (no nested parsing): + +```typescript +const result = parse('"hello {world}"'); +// Returns: +// [ +// { type: 'doubleQuote', content: 'hello {world}' } +// ] +``` ### `serialize(sequence: Sequence): string` Converts a sequence of blocks back to text. +```typescript +const blocks = [ + { type: 'text', content: 'hello ' }, + { type: 'paren', content: [{ type: 'text', content: 'world' }] } +]; +const text = serialize(blocks); +// Returns: "hello (world)" +``` + +### Class-based API + +```typescript +import { MetaNotationParser, MetaNotationSerializer } from 'meta-notation'; + +const parser = new MetaNotationParser(); +const serializer = new MetaNotationSerializer(); + +const parsed = parser.parse('hello (world)'); +const text = serializer.serialize(parsed); +``` + +## Types + +```typescript +type DelimiterType = 'paren' | 'curly' | 'square' | 'singleQuote' | 'doubleQuote' | 'backtick' | 'text'; + +interface Block { + type: DelimiterType; + content: Block[] | string; // Block[] for brackets, string for quotes and text +} + +type Sequence = Block[]; + +interface Parser { + parse(input: string): Sequence; +} + +interface Serializer { + serialize(sequence: Sequence): string; +} +``` + +### Content types by delimiter + +| Delimiter | Type Name | Content Type | Example | +|-----------|-----------|-------------|---------| +| `()` | `paren` | `Block[]` | `{ type: 'paren', content: [{ type: 'text', content: 'x' }] }` | +| `{}` | `curly` | `Block[]` | `{ type: 'curly', content: [{ type: 'text', content: 'x' }] }` | +| `[]` | `square` | `Block[]` | `{ type: 'square', content: [{ type: 'text', content: 'x' }] }` | +| `''` | `singleQuote` | `string` | `{ type: 'singleQuote', content: 'hello' }` | +| `""` | `doubleQuote` | `string` | `{ type: 'doubleQuote', content: 'hello' }` | +| `` `` `` | `backtick` | `string` | `{ type: 'backtick', content: 'hello' }` | +| plain text | `text` | `string` | `{ type: 'text', content: 'hello' }` | + ## Building ```bash @@ -45,13 +138,20 @@ npm run build npm test ``` +81 test cases covering: +- **Parser tests**: Each delimiter type, mixed delimiters, nested structures, empty delimiters, quotes with special characters, JavaScript/Python/JSON-like code structures, with exact expected parsed object verification +- **Serializer tests**: Serialization of each delimiter type, round-trip consistency +- **Programming language tests**: 25+ languages with delimiter type and round-trip verification, key tests include full expected parsed object structure +- **Natural language tests**: 25 tests for English, Spanish, French, German, Italian, Portuguese, Russian, Japanese, Chinese text, academic citations, mathematical expressions, legal text, and more + ## Features - **Universal Delimiter Parsing**: Parses `()`, `{}`, `[]`, `''`, `""`, `` ` ` `` - **Language Agnostic**: Works with 25+ programming languages and all natural languages - **Nested Structures**: Supports arbitrary nesting of delimiters - **Round-trip Serialization**: Parse and serialize back to original text -- **TypeScript Support**: Fully typed API -- **81 Test Cases**: Comprehensive test coverage +- **TypeScript Support**: Fully typed API with exported interfaces +- **PEG.js Grammar**: Clean, maintainable grammar definition +- **Identical Output**: Produces the same parsed structure as the Rust implementation See the [main README](../README.md) for more information. diff --git a/js/tests/languages.test.ts b/js/tests/languages.test.ts index c0945a2..cfbe78b 100644 --- a/js/tests/languages.test.ts +++ b/js/tests/languages.test.ts @@ -26,7 +26,20 @@ function hasDelimiterType(sequence: Sequence, type: DelimiterType): boolean { test('parse JavaScript code', () => { const code = 'const greet = (name) => { return `Hello, ${name}!`; };'; const result = parse(code); - assert.ok(result.length > 0); + + // Verify exact parsed structure + assert.deepEqual(result, [ + { type: 'text', content: 'const greet = ' }, + { type: 'paren', content: [{ type: 'text', content: 'name' }] }, + { type: 'text', content: ' => ' }, + { type: 'curly', content: [ + { type: 'text', content: ' return ' }, + { type: 'backtick', content: 'Hello, ${name}!' }, + { type: 'text', content: '; ' }, + ]}, + { type: 'text', content: ';' }, + ]); + assert.ok(hasDelimiterType(result, 'paren')); assert.ok(hasDelimiterType(result, 'curly')); assert.ok(hasDelimiterType(result, 'backtick')); @@ -38,6 +51,21 @@ test('parse JavaScript code', () => { test('parse Python code', () => { const code = 'def calculate(x, y): return {"sum": x + y, "list": [x, y]}'; const result = parse(code); + + // Verify exact parsed structure + assert.deepEqual(result, [ + { type: 'text', content: 'def calculate' }, + { type: 'paren', content: [{ type: 'text', content: 'x, y' }] }, + { type: 'text', content: ': return ' }, + { type: 'curly', content: [ + { type: 'doubleQuote', content: 'sum' }, + { type: 'text', content: ': x + y, ' }, + { type: 'doubleQuote', content: 'list' }, + { type: 'text', content: ': ' }, + { type: 'square', content: [{ type: 'text', content: 'x, y' }] }, + ]}, + ]); + assert.ok(hasDelimiterType(result, 'paren')); assert.ok(hasDelimiterType(result, 'curly')); assert.ok(hasDelimiterType(result, 'square')); @@ -49,6 +77,19 @@ test('parse Python code', () => { test('parse Go code', () => { const code = 'func main() { fmt.Println("Hello, World!") }'; const result = parse(code); + + // Verify exact parsed structure + assert.deepEqual(result, [ + { type: 'text', content: 'func main' }, + { type: 'paren', content: [] }, + { type: 'text', content: ' ' }, + { type: 'curly', content: [ + { type: 'text', content: ' fmt.Println' }, + { type: 'paren', content: [{ type: 'doubleQuote', content: 'Hello, World!' }] }, + { type: 'text', content: ' ' }, + ]}, + ]); + assert.ok(hasDelimiterType(result, 'paren')); assert.ok(hasDelimiterType(result, 'curly')); assert.ok(hasDelimiterType(result, 'doubleQuote')); @@ -239,6 +280,26 @@ test('parse SQL code', () => { test('parse JSON', () => { const code = '{"name": "John", "age": 30, "tags": ["developer", "designer"]}'; const result = parse(code); + + // Verify exact parsed structure + assert.deepEqual(result, [ + { type: 'curly', content: [ + { type: 'doubleQuote', content: 'name' }, + { type: 'text', content: ': ' }, + { type: 'doubleQuote', content: 'John' }, + { type: 'text', content: ', ' }, + { type: 'doubleQuote', content: 'age' }, + { type: 'text', content: ': 30, ' }, + { type: 'doubleQuote', content: 'tags' }, + { type: 'text', content: ': ' }, + { type: 'square', content: [ + { type: 'doubleQuote', content: 'developer' }, + { type: 'text', content: ', ' }, + { type: 'doubleQuote', content: 'designer' }, + ]}, + ]}, + ]); + assert.ok(hasDelimiterType(result, 'curly')); assert.ok(hasDelimiterType(result, 'square')); assert.ok(hasDelimiterType(result, 'doubleQuote')); diff --git a/js/tests/natural-languages.test.ts b/js/tests/natural-languages.test.ts index 4becda2..ee9a5f5 100644 --- a/js/tests/natural-languages.test.ts +++ b/js/tests/natural-languages.test.ts @@ -30,6 +30,14 @@ function hasDelimiterType(sequence: Sequence, type: DelimiterType): boolean { test('parse English text with quotes', () => { const text = 'She said, "Hello, world!" and smiled.'; const result = parse(text); + + // Verify exact parsed structure + assert.deepEqual(result, [ + { type: 'text', content: 'She said, ' }, + { type: 'doubleQuote', content: 'Hello, world!' }, + { type: 'text', content: ' and smiled.' }, + ]); + assert.ok(hasDelimiterType(result, 'doubleQuote')); assert.equal(serialize(result), text); }); @@ -37,6 +45,14 @@ test('parse English text with quotes', () => { test('parse English text with parentheses', () => { const text = 'The conference (scheduled for next week) will be online.'; const result = parse(text); + + // Verify exact parsed structure + assert.deepEqual(result, [ + { type: 'text', content: 'The conference ' }, + { type: 'paren', content: [{ type: 'text', content: 'scheduled for next week' }] }, + { type: 'text', content: ' will be online.' }, + ]); + assert.ok(hasDelimiterType(result, 'paren')); assert.equal(serialize(result), text); }); @@ -44,6 +60,14 @@ test('parse English text with parentheses', () => { test('parse English text with brackets', () => { const text = 'According to the report [see page 42], the results were positive.'; const result = parse(text); + + // Verify exact parsed structure + assert.deepEqual(result, [ + { type: 'text', content: 'According to the report ' }, + { type: 'square', content: [{ type: 'text', content: 'see page 42' }] }, + { type: 'text', content: ', the results were positive.' }, + ]); + assert.ok(hasDelimiterType(result, 'square')); assert.equal(serialize(result), text); }); @@ -151,6 +175,16 @@ test('parse Chinese text (Pinyin) with quotes', () => { test('parse academic text with citations', () => { const text = 'The study [Smith et al., 2020] found that performance (measured in ms) improved.'; const result = parse(text); + + // Verify exact parsed structure + assert.deepEqual(result, [ + { type: 'text', content: 'The study ' }, + { type: 'square', content: [{ type: 'text', content: 'Smith et al., 2020' }] }, + { type: 'text', content: ' found that performance ' }, + { type: 'paren', content: [{ type: 'text', content: 'measured in ms' }] }, + { type: 'text', content: ' improved.' }, + ]); + assert.ok(hasDelimiterType(result, 'square')); assert.ok(hasDelimiterType(result, 'paren')); assert.equal(serialize(result), text); @@ -160,6 +194,18 @@ test('parse academic text with citations', () => { test('parse mathematical text', () => { const text = 'The formula is f(x) = [a + b] * {c - d}.'; const result = parse(text); + + // Verify exact parsed structure + assert.deepEqual(result, [ + { type: 'text', content: 'The formula is f' }, + { type: 'paren', content: [{ type: 'text', content: 'x' }] }, + { type: 'text', content: ' = ' }, + { type: 'square', content: [{ type: 'text', content: 'a + b' }] }, + { type: 'text', content: ' * ' }, + { type: 'curly', content: [{ type: 'text', content: 'c - d' }] }, + { type: 'text', content: '.' }, + ]); + assert.ok(hasDelimiterType(result, 'paren')); assert.ok(hasDelimiterType(result, 'square')); assert.ok(hasDelimiterType(result, 'curly')); diff --git a/rust/README.md b/rust/README.md index 9bbcf8e..38fcb27 100644 --- a/rust/README.md +++ b/rust/README.md @@ -33,24 +33,126 @@ fn main() { Parses text into a sequence of blocks. +- **Bracket delimiters** (`Paren`, `Curly`, `Square`): contain `Vec` (nested blocks) +- **Quote delimiters** (`SingleQuote`, `DoubleQuote`, `Backtick`): contain `String` (no nested parsing inside quotes) +- **Plain text** (`Text`): contains `String` + +```rust +let result = parse("hello (world) {test}"); +// Returns: +// [ +// Block::Text("hello "), +// Block::Paren([Block::Text("world")]), +// Block::Text(" "), +// Block::Curly([Block::Text("test")]) +// ] +``` + +Nested structures are supported: + +```rust +let result = parse("{a [b (c) d] e}"); +// Returns: +// [ +// Block::Curly([ +// Block::Text("a "), +// Block::Square([ +// Block::Text("b "), +// Block::Paren([Block::Text("c")]), +// Block::Text(" d") +// ]), +// Block::Text(" e") +// ]) +// ] +``` + +Quotes capture content as a plain string (no nested parsing): + +```rust +let result = parse("\"hello {world}\""); +// Returns: +// [ +// Block::DoubleQuote("hello {world}") +// ] +``` + ### `serialize(blocks: &[Block]) -> String` Converts a sequence of blocks back to text. +```rust +let blocks = vec![ + Block::Text("hello ".to_string()), + Block::Paren(vec![Block::Text("world".to_string())]), +]; +let text = serialize(&blocks); +assert_eq!(text, "hello (world)"); +``` + +### Block Methods + +```rust +// Get the delimiter type of a block +let block = Block::Paren(vec![]); +assert_eq!(block.delimiter_type(), DelimiterType::Paren); + +// Check if a block or its children contain a specific delimiter type (recursive) +let result = parse("(a {b} c)"); +assert!(result[0].has_delimiter_type(&DelimiterType::Curly)); +``` + ## Types ```rust pub enum Block { - Paren(Vec), - Curly(Vec), - Square(Vec), - SingleQuote(String), - DoubleQuote(String), - Backtick(String), - Text(String), + Paren(Vec), // () - content is nested blocks + Curly(Vec), // {} - content is nested blocks + Square(Vec), // [] - content is nested blocks + SingleQuote(String), // '' - content is a plain string + DoubleQuote(String), // "" - content is a plain string + Backtick(String), // `` - content is a plain string + Text(String), // plain text +} + +pub enum DelimiterType { + Paren, + Curly, + Square, + SingleQuote, + DoubleQuote, + Backtick, + Text, } ``` +### Content types by delimiter + +| Delimiter | Variant | Content Type | Example | +|-----------|---------|-------------|---------| +| `()` | `Block::Paren(...)` | `Vec` | `Block::Paren(vec![Block::Text("x".into())])` | +| `{}` | `Block::Curly(...)` | `Vec` | `Block::Curly(vec![Block::Text("x".into())])` | +| `[]` | `Block::Square(...)` | `Vec` | `Block::Square(vec![Block::Text("x".into())])` | +| `''` | `Block::SingleQuote(...)` | `String` | `Block::SingleQuote("hello".into())` | +| `""` | `Block::DoubleQuote(...)` | `String` | `Block::DoubleQuote("hello".into())` | +| `` `` `` | `Block::Backtick(...)` | `String` | `Block::Backtick("hello".into())` | +| plain text | `Block::Text(...)` | `String` | `Block::Text("hello".into())` | + +### Serde JSON Serialization + +The `Block` enum uses serde's `#[serde(tag = "type", content = "content", rename_all = "camelCase")]` attribute, producing JSON identical to the JavaScript implementation: + +```rust +let result = parse("hello (world)"); +let json = serde_json::to_string_pretty(&result).unwrap(); +// Produces: +// [ +// { "type": "text", "content": "hello " }, +// { "type": "paren", "content": [{ "type": "text", "content": "world" }] } +// ] +``` + +This ensures interoperability between the JavaScript and Rust implementations. + ## Building ```bash @@ -63,13 +165,22 @@ cargo build --release cargo test ``` +92 test cases covering: +- **Parser tests** (15 tests): Each delimiter type, mixed delimiters, nested structures, empty delimiters, quotes with special characters, JavaScript/Python/JSON-like code structures, with exact expected parsed object verification +- **Serializer tests** (10 tests): Serialization of each delimiter type, round-trip consistency +- **Programming language tests** (26 tests): 25+ languages with delimiter type and round-trip verification, key tests include full expected parsed object structure +- **Natural language tests** (30 tests): English, Spanish, French, German, Italian, Portuguese, Russian, Japanese, Chinese text, academic citations, mathematical expressions, legal text, and more +- **Unit tests** (9 tests): Internal parser tests in lib.rs +- **Doc tests** (2 tests): Examples in documentation + ## Features - **Universal Delimiter Parsing**: Parses `()`, `{}`, `[]`, `''`, `""`, `` ` ` `` -- **Language Agnostic**: Works with programming and natural languages +- **Language Agnostic**: Works with 25+ programming languages and all natural languages - **Nested Structures**: Supports arbitrary nesting of delimiters - **Round-trip Serialization**: Parse and serialize back to original text -- **Serde Support**: Serialize/deserialize to JSON -- **Zero Dependencies**: Only uses `serde` for serialization +- **Serde Support**: Serialize/deserialize to JSON with structure identical to JavaScript implementation +- **Display Trait**: Blocks implement `Display` for convenient string formatting +- **Identical Output**: Produces the same parsed structure as the JavaScript implementation See the [main README](../README.md) for more information. diff --git a/rust/tests/languages_test.rs b/rust/tests/languages_test.rs index 57d9d33..bb46199 100644 --- a/rust/tests/languages_test.rs +++ b/rust/tests/languages_test.rs @@ -1,9 +1,14 @@ //! Tests for meta-notation with various programming languages +//! +//! Each test verifies: +//! 1. The expected delimiter types are found in the parsed result +//! 2. Round-trip serialization preserves the original text +//! 3. Key tests include full expected parsed object structure verification -use meta_notation::{parse, serialize, DelimiterType}; +use meta_notation::{parse, serialize, Block, DelimiterType}; // Helper function to check if a delimiter type exists anywhere in the parsed result -fn has_delimiter_type(blocks: &[meta_notation::Block], dtype: &DelimiterType) -> bool { +fn has_delimiter_type(blocks: &[Block], dtype: &DelimiterType) -> bool { blocks.iter().any(|b| b.has_delimiter_type(dtype)) } @@ -11,7 +16,23 @@ fn has_delimiter_type(blocks: &[meta_notation::Block], dtype: &DelimiterType) -> fn test_parse_javascript_code() { let code = "const greet = (name) => { return `Hello, ${name}!`; };"; let result = parse(code); - assert!(result.len() > 0); + + // Verify exact parsed structure + assert_eq!( + result, + vec![ + Block::Text("const greet = ".to_string()), + Block::Paren(vec![Block::Text("name".to_string())]), + Block::Text(" => ".to_string()), + Block::Curly(vec![ + Block::Text(" return ".to_string()), + Block::Backtick("Hello, ${name}!".to_string()), + Block::Text("; ".to_string()), + ]), + Block::Text(";".to_string()), + ] + ); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); assert!(has_delimiter_type(&result, &DelimiterType::Curly)); assert!(has_delimiter_type(&result, &DelimiterType::Backtick)); @@ -22,6 +43,24 @@ fn test_parse_javascript_code() { fn test_parse_python_code() { let code = r#"def calculate(x, y): return {"sum": x + y, "list": [x, y]}"#; let result = parse(code); + + // Verify exact parsed structure + assert_eq!( + result, + vec![ + Block::Text("def calculate".to_string()), + Block::Paren(vec![Block::Text("x, y".to_string())]), + Block::Text(": return ".to_string()), + Block::Curly(vec![ + Block::DoubleQuote("sum".to_string()), + Block::Text(": x + y, ".to_string()), + Block::DoubleQuote("list".to_string()), + Block::Text(": ".to_string()), + Block::Square(vec![Block::Text("x, y".to_string())]), + ]), + ] + ); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); assert!(has_delimiter_type(&result, &DelimiterType::Curly)); assert!(has_delimiter_type(&result, &DelimiterType::Square)); @@ -33,6 +72,22 @@ fn test_parse_python_code() { fn test_parse_go_code() { let code = r#"func main() { fmt.Println("Hello, World!") }"#; let result = parse(code); + + // Verify exact parsed structure + assert_eq!( + result, + vec![ + Block::Text("func main".to_string()), + Block::Paren(vec![]), + Block::Text(" ".to_string()), + Block::Curly(vec![ + Block::Text(" fmt.Println".to_string()), + Block::Paren(vec![Block::DoubleQuote("Hello, World!".to_string())]), + Block::Text(" ".to_string()), + ]), + ] + ); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); assert!(has_delimiter_type(&result, &DelimiterType::Curly)); assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); @@ -224,6 +279,27 @@ fn test_parse_sql_code() { fn test_parse_json() { let code = r#"{"name": "John", "age": 30, "tags": ["developer", "designer"]}"#; let result = parse(code); + + // Verify exact parsed structure + assert_eq!( + result, + vec![Block::Curly(vec![ + Block::DoubleQuote("name".to_string()), + Block::Text(": ".to_string()), + Block::DoubleQuote("John".to_string()), + Block::Text(", ".to_string()), + Block::DoubleQuote("age".to_string()), + Block::Text(": 30, ".to_string()), + Block::DoubleQuote("tags".to_string()), + Block::Text(": ".to_string()), + Block::Square(vec![ + Block::DoubleQuote("developer".to_string()), + Block::Text(", ".to_string()), + Block::DoubleQuote("designer".to_string()), + ]), + ])] + ); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); assert!(has_delimiter_type(&result, &DelimiterType::Square)); assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); diff --git a/rust/tests/natural_languages_test.rs b/rust/tests/natural_languages_test.rs index cab3f60..d26c093 100644 --- a/rust/tests/natural_languages_test.rs +++ b/rust/tests/natural_languages_test.rs @@ -15,6 +15,17 @@ fn has_delimiter_type(blocks: &[Block], dtype: &DelimiterType) -> bool { fn test_parse_english_text_with_quotes() { let text = r#"She said, "Hello, world!" and smiled."#; let result = parse(text); + + // Verify exact parsed structure + assert_eq!( + result, + vec![ + Block::Text("She said, ".to_string()), + Block::DoubleQuote("Hello, world!".to_string()), + Block::Text(" and smiled.".to_string()), + ] + ); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); assert_eq!(serialize(&result), text); } @@ -23,6 +34,17 @@ fn test_parse_english_text_with_quotes() { fn test_parse_english_text_with_parentheses() { let text = "The conference (scheduled for next week) will be online."; let result = parse(text); + + // Verify exact parsed structure + assert_eq!( + result, + vec![ + Block::Text("The conference ".to_string()), + Block::Paren(vec![Block::Text("scheduled for next week".to_string())]), + Block::Text(" will be online.".to_string()), + ] + ); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); assert_eq!(serialize(&result), text); } @@ -31,6 +53,17 @@ fn test_parse_english_text_with_parentheses() { fn test_parse_english_text_with_brackets() { let text = "According to the report [see page 42], the results were positive."; let result = parse(text); + + // Verify exact parsed structure + assert_eq!( + result, + vec![ + Block::Text("According to the report ".to_string()), + Block::Square(vec![Block::Text("see page 42".to_string())]), + Block::Text(", the results were positive.".to_string()), + ] + ); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); assert_eq!(serialize(&result), text); } @@ -147,6 +180,19 @@ fn test_parse_chinese_text_pinyin_with_quotes() { fn test_parse_academic_text_with_citations() { let text = "The study [Smith et al., 2020] found that performance (measured in ms) improved."; let result = parse(text); + + // Verify exact parsed structure + assert_eq!( + result, + vec![ + Block::Text("The study ".to_string()), + Block::Square(vec![Block::Text("Smith et al., 2020".to_string())]), + Block::Text(" found that performance ".to_string()), + Block::Paren(vec![Block::Text("measured in ms".to_string())]), + Block::Text(" improved.".to_string()), + ] + ); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); assert!(has_delimiter_type(&result, &DelimiterType::Paren)); assert_eq!(serialize(&result), text); @@ -156,6 +202,21 @@ fn test_parse_academic_text_with_citations() { fn test_parse_mathematical_text() { let text = "The formula is f(x) = [a + b] * {c - d}."; let result = parse(text); + + // Verify exact parsed structure + assert_eq!( + result, + vec![ + Block::Text("The formula is f".to_string()), + Block::Paren(vec![Block::Text("x".to_string())]), + Block::Text(" = ".to_string()), + Block::Square(vec![Block::Text("a + b".to_string())]), + Block::Text(" * ".to_string()), + Block::Curly(vec![Block::Text("c - d".to_string())]), + Block::Text(".".to_string()), + ] + ); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); assert!(has_delimiter_type(&result, &DelimiterType::Square)); assert!(has_delimiter_type(&result, &DelimiterType::Curly)); diff --git a/rust/tests/parser_test.rs b/rust/tests/parser_test.rs new file mode 100644 index 0000000..43e40e3 --- /dev/null +++ b/rust/tests/parser_test.rs @@ -0,0 +1,140 @@ +//! Tests for the meta-notation parser +//! +//! These tests verify the exact parsed object structure matches the expected +//! output, ensuring consistency with the JavaScript implementation. + +use meta_notation::{parse, Block}; + +#[test] +fn test_parse_plain_text() { + let result = parse("hello world"); + assert_eq!(result, vec![Block::Text("hello world".to_string())]); +} + +#[test] +fn test_parse_parentheses() { + let result = parse("(hello)"); + assert_eq!(result, vec![Block::Paren(vec![Block::Text("hello".to_string())])]); +} + +#[test] +fn test_parse_curly_braces() { + let result = parse("{world}"); + assert_eq!(result, vec![Block::Curly(vec![Block::Text("world".to_string())])]); +} + +#[test] +fn test_parse_square_brackets() { + let result = parse("[test]"); + assert_eq!(result, vec![Block::Square(vec![Block::Text("test".to_string())])]); +} + +#[test] +fn test_parse_single_quotes() { + let result = parse("'hello'"); + assert_eq!(result, vec![Block::SingleQuote("hello".to_string())]); +} + +#[test] +fn test_parse_double_quotes() { + let result = parse("\"world\""); + assert_eq!(result, vec![Block::DoubleQuote("world".to_string())]); +} + +#[test] +fn test_parse_backticks() { + let result = parse("`code`"); + assert_eq!(result, vec![Block::Backtick("code".to_string())]); +} + +#[test] +fn test_parse_mixed_delimiters() { + let result = parse("hello (world) {test}"); + assert_eq!( + result, + vec![ + Block::Text("hello ".to_string()), + Block::Paren(vec![Block::Text("world".to_string())]), + Block::Text(" ".to_string()), + Block::Curly(vec![Block::Text("test".to_string())]), + ] + ); +} + +#[test] +fn test_parse_nested_structures() { + let result = parse("(a (b) c)"); + assert_eq!( + result, + vec![Block::Paren(vec![ + Block::Text("a ".to_string()), + Block::Paren(vec![Block::Text("b".to_string())]), + Block::Text(" c".to_string()), + ])] + ); +} + +#[test] +fn test_parse_complex_nested_structures() { + let result = parse("{a [b (c) d] e}"); + assert_eq!( + result, + vec![Block::Curly(vec![ + Block::Text("a ".to_string()), + Block::Square(vec![ + Block::Text("b ".to_string()), + Block::Paren(vec![Block::Text("c".to_string())]), + Block::Text(" d".to_string()), + ]), + Block::Text(" e".to_string()), + ])] + ); +} + +#[test] +fn test_parse_empty_delimiters() { + let result = parse("(){}[]"); + assert_eq!( + result, + vec![ + Block::Paren(vec![]), + Block::Curly(vec![]), + Block::Square(vec![]), + ] + ); +} + +#[test] +fn test_parse_quotes_with_special_chars() { + let result = parse("\"hello {world}\""); + assert_eq!(result, vec![Block::DoubleQuote("hello {world}".to_string())]); +} + +#[test] +fn test_parse_javascript_like_code() { + let result = parse("function test() { return \"hello\"; }"); + assert_eq!(result.len(), 4); + assert!(matches!(result[0], Block::Text(_))); + assert!(matches!(result[1], Block::Paren(_))); + assert!(matches!(result[3], Block::Curly(_))); +} + +#[test] +fn test_parse_python_like_code() { + let result = parse("def test(): return [1, 2, 3]"); + let has_paren = result.iter().any(|b| matches!(b, Block::Paren(_))); + let has_square = result.iter().any(|b| matches!(b, Block::Square(_))); + let has_text = result.iter().any(|b| matches!(b, Block::Text(_))); + assert!(has_paren); + assert!(has_square); + assert!(has_text); +} + +#[test] +fn test_parse_json_like_structure() { + let result = parse("{\"key\": \"value\", \"array\": [1, 2, 3]}"); + assert!(matches!(result[0], Block::Curly(_))); + if let Block::Curly(ref content) = result[0] { + assert!(!content.is_empty()); + } +} diff --git a/rust/tests/serializer_test.rs b/rust/tests/serializer_test.rs new file mode 100644 index 0000000..4cf4467 --- /dev/null +++ b/rust/tests/serializer_test.rs @@ -0,0 +1,84 @@ +//! Tests for the meta-notation serializer +//! +//! These tests verify that serialization produces the expected output and +//! that round-trip (parse -> serialize) preserves the original text. + +use meta_notation::{parse, serialize, Block}; + +#[test] +fn test_serialize_plain_text() { + let blocks = vec![Block::Text("hello world".to_string())]; + let result = serialize(&blocks); + assert_eq!(result, "hello world"); +} + +#[test] +fn test_serialize_parentheses() { + let blocks = vec![Block::Paren(vec![Block::Text("hello".to_string())])]; + let result = serialize(&blocks); + assert_eq!(result, "(hello)"); +} + +#[test] +fn test_serialize_curly_braces() { + let blocks = vec![Block::Curly(vec![Block::Text("world".to_string())])]; + let result = serialize(&blocks); + assert_eq!(result, "{world}"); +} + +#[test] +fn test_serialize_square_brackets() { + let blocks = vec![Block::Square(vec![Block::Text("test".to_string())])]; + let result = serialize(&blocks); + assert_eq!(result, "[test]"); +} + +#[test] +fn test_serialize_quotes() { + let blocks = vec![ + Block::SingleQuote("hello".to_string()), + Block::Text(" ".to_string()), + Block::DoubleQuote("world".to_string()), + ]; + let result = serialize(&blocks); + assert_eq!(result, "'hello' \"world\""); +} + +#[test] +fn test_serialize_backticks() { + let blocks = vec![Block::Backtick("code".to_string())]; + let result = serialize(&blocks); + assert_eq!(result, "`code`"); +} + +#[test] +fn test_round_trip_parse_then_serialize() { + let original = "hello (world) {test} [array] \"string\" `code`"; + let parsed = parse(original); + let serialized = serialize(&parsed); + assert_eq!(serialized, original); +} + +#[test] +fn test_round_trip_nested_structures() { + let original = "{a [b (c) d] e}"; + let parsed = parse(original); + let serialized = serialize(&parsed); + assert_eq!(serialized, original); +} + +#[test] +fn test_round_trip_empty_delimiters() { + let original = "(){}[]"; + let parsed = parse(original); + let serialized = serialize(&parsed); + assert_eq!(serialized, original); +} + +#[test] +fn test_round_trip_complex_code() { + let original = "function test() { return \"hello\"; }"; + let parsed = parse(original); + let serialized = serialize(&parsed); + assert_eq!(serialized, original); +}