From 9719cd44267e2365cc9bcda9842bb63e0b22c280 Mon Sep 17 00:00:00 2001 From: nub31 Date: Sun, 2 Feb 2025 20:12:20 +0100 Subject: [PATCH] working kinda --- Nub.Lang/Nub.Lang/Backend/Custom/Generator.cs | 45 +--- .../Nub.Lang/Backend/Custom/SymbolTable.cs | 8 +- input/baseline/gc.asm | 208 ++++++++++++++++++ input/baseline/str_cmp.asm | 20 ++ input/core/arr_size.asm | 2 +- input/core/str_len.asm | 2 +- output/build.sh | 12 +- 7 files changed, 250 insertions(+), 47 deletions(-) create mode 100644 input/baseline/gc.asm create mode 100644 input/baseline/str_cmp.asm diff --git a/Nub.Lang/Nub.Lang/Backend/Custom/Generator.cs b/Nub.Lang/Nub.Lang/Backend/Custom/Generator.cs index 052b7f4..91fcb2a 100644 --- a/Nub.Lang/Nub.Lang/Backend/Custom/Generator.cs +++ b/Nub.Lang/Nub.Lang/Backend/Custom/Generator.cs @@ -42,6 +42,8 @@ public class Generator { _builder.AppendLine("global _start"); + _builder.AppendLine("extern gc_alloc"); + _builder.AppendLine("extern str_cmp"); foreach (var externFuncDefinition in _definitions.OfType()) { _builder.AppendLine($"extern {externFuncDefinition.Name}"); @@ -70,41 +72,6 @@ public class Generator _builder.AppendLine(""" - eb6e_alloc: - mov rax, 9 - mov rsi, rdi - mov rdi, 0 - mov rdx, 3 - mov r10, 34 - mov r8, -1 - mov r9, 0 - syscall - cmp rax, -1 - je .error - ret - .error: - mov rax, 60 - mov rdi, 1 - syscall - - eb6e_str_cmp: - xor rdx, rdx - .loop: - mov al, [rsi + rdx] - mov bl, [rdi + rdx] - inc rdx - cmp al, bl - jne .not_equal - cmp al, 0 - je .equal - jmp .loop - .not_equal: - mov rax, 0 - ret - .equal: - mov rax, 1 - ret - eb6e_oob_error: mov rax, 60 mov rdi, 139 @@ -429,8 +396,8 @@ public class Generator private void GenerateArrayInitializer(ArrayInitializerNode arrayInitializer) { _builder.AppendLine($" mov rdi, {8 + arrayInitializer.Length * 8}"); - _builder.AppendLine(" call eb6e_alloc"); - _builder.AppendLine($" mov QWORD [rax], {arrayInitializer.Length}"); + _builder.AppendLine(" call gc_alloc"); + _builder.AppendLine($" mov qword [rax], {arrayInitializer.Length}"); } private void GenerateBinaryExpression(BinaryExpressionNode binaryExpression, LocalFunc func) @@ -506,7 +473,7 @@ public class Generator case StringType: _builder.AppendLine(" mov rdi, rax"); _builder.AppendLine(" mov rsi, rcx"); - _builder.AppendLine(" call eb6e_str_cmp"); + _builder.AppendLine(" call str_cmp"); break; default: throw new ArgumentOutOfRangeException(nameof(type)); @@ -660,7 +627,7 @@ public class Generator } _builder.AppendLine($" mov rdi, {structDefinition.Members.Count * 8}"); - _builder.AppendLine(" call eb6e_alloc"); + _builder.AppendLine(" call gc_alloc"); _builder.AppendLine(" mov rcx, rax"); foreach (var initializer in structInitializer.Initializers) diff --git a/Nub.Lang/Nub.Lang/Backend/Custom/SymbolTable.cs b/Nub.Lang/Nub.Lang/Backend/Custom/SymbolTable.cs index 73f8750..3573413 100644 --- a/Nub.Lang/Nub.Lang/Backend/Custom/SymbolTable.cs +++ b/Nub.Lang/Nub.Lang/Backend/Custom/SymbolTable.cs @@ -91,20 +91,20 @@ public class SymbolTable { case IfNode ifStatement: { - offset += ResolveBlockVariables(ifStatement.Body, variables, offset); + offset = ResolveBlockVariables(ifStatement.Body, variables, offset); if (ifStatement.Else.HasValue) { ifStatement.Else.Value.Match ( - elseIfStatement => offset += ResolveBlockVariables(elseIfStatement.Body, variables, offset), - elseStatement => offset += ResolveBlockVariables(elseStatement, variables, offset) + elseIfStatement => offset = ResolveBlockVariables(elseIfStatement.Body, variables, offset), + elseStatement => offset = ResolveBlockVariables(elseStatement, variables, offset) ); } break; } case WhileNode whileStatement: { - offset += ResolveBlockVariables(whileStatement.Body, variables, offset); + offset = ResolveBlockVariables(whileStatement.Body, variables, offset); break; } case VariableAssignmentNode variableAssignment: diff --git a/input/baseline/gc.asm b/input/baseline/gc.asm new file mode 100644 index 0000000..8b62d7b --- /dev/null +++ b/input/baseline/gc.asm @@ -0,0 +1,208 @@ +global gc_init, gc_alloc, gc_free, gc_collect +extern itoa +extern str_len + +section .bss + alloc_list: resq 1 + stack_start: resq 1 + +; TMP + +section .data + newline: db 10, 0 + start_mark: db "Starting to mark", 0 + marked: db "Marked object", 0 + +; /TMP + +section .text + +; TMP + +print_int: + push rbp + mov rbp, rsp + sub rsp, 8 + mov [rbp - 8], rdi + mov rax, [rbp - 8] + push rax + pop rdi + call itoa + push rax + pop rdi + call print + mov rdi, newline + call print + mov rsp, rbp + pop rbp + ret + +print: + push rbp + mov rbp, rsp + sub rsp, 8 + mov [rbp - 8], rdi + mov rax, 1 + push rax + mov rax, 1 + push rax + mov rax, [rbp - 8] + push rax + mov rax, [rbp - 8] + push rax + pop rdi + call str_len + push rax + pop rdx + pop rsi + pop rdi + pop rax + syscall + mov rsp, rbp + pop rbp + ret + +; /TMP + +gc_init: + mov [stack_start], rsp + ret + +gc_alloc: + add rdi, 17 ; add space for metadata + push rdi + call sys_mmap ; allocate size + metadata + pop rdi + mov byte [rax], 0 ; set mark to 0 + mov qword [rax + 1], rdi ; set total size of object (including metadata) + mov rsi, [alloc_list] ; load first item in allocation list + mov qword [rax + 9], rsi ; make current head of allocation list the next item in this object + mov [alloc_list], rax ; update head of allocation list so it points to this object + add rax, 17 ; skip metadata for return value + ret + +; Generated by chatgpt. Rewrite this later +; TODO: refactor to unlink easier +gc_free: + mov rsi, [alloc_list] ; Load head of allocation list + test rsi, rsi ; Check if list is empty + jz .not_found ; If empty, nothing to free + cmp rsi, rdi ; Is the first item the one to free? + je .remove_head ; If so, update head directly +.loop: + mov rdx, [rsi + 9] ; Load next item in list + test rdx, rdx ; Check if end of list + jz .not_found ; If not found, return + cmp rdx, rdi ; Is this the item to remove? + je .remove_item ; If so, unlink it + mov rsi, rdx ; Move to next item + jmp .loop ; Repeat +.remove_head: + mov rdx, [rdi + 9] ; Get next item + mov [alloc_list], rdx ; Update head of list + jmp .free_memory ; Free the object +.remove_item: + mov rdx, [rdi + 9] ; Get next item + mov [rsi + 9], rdx ; Bypass rdi in the list +.free_memory: + mov rsi, [rdi + 1] ; Get object size + call sys_munmap ; Free memory + ret +.not_found: + ret ; Item not found, do nothing + +gc_collect: + call gc_mark_stack + call gc_sweep + ret + +gc_mark_stack: + mov r8, rsp ; load current stack pointer + mov r9, [stack_start] ; load start of stack +.loop: + cmp r8, r9 ; have we reached end of stack? + ja .done ; yes? return + mov rdi, [r8] ; no? load the value + call gc_mark ; this might be an allocation, check + add r8, 8 ; next item in stack + jmp .loop +.done: + ret + +gc_mark: + test rdi, rdi ; is input null? + jz .done ; yes? return + mov rsi, [alloc_list] ; load start of allocation list +.loop: + test rsi, rsi ; reached end of list? + jz .done ; yes? return + lea rdx, [rsi + 17] + cmp rdx, rdi ; no? is this the input object? + je .mark_object ; yes? mark it + mov rsi, [rsi + 9] ; no? next item + jmp .loop +.mark_object: + mov al, [rdi] ; load mark + test al, al ; already marked? + jnz .done ; yes? return + mov byte [rdi - 17], 1 ; mark object + mov rcx, [rdi + 1] ; load object size + lea rdx, [rdi + 17] ; start of data + add rcx, rdx ; end of data +.scan_object: + cmp rdx, rcx ; done scanning? + jae .done ; yes? return + mov rdi, [rbx] ; load value + call gc_mark + add rdx, 8 ; next object + jmp .scan_object +.done: + ret + +gc_sweep: + mov rdi, [alloc_list] +.loop: + test rdi, rdi ; reached end of list? + jz .done ; yes? return + mov al, [rdi] + test al, al ; is object marked? + jz .free ; no? free it + mov byte [rdi], 0 ; yes? clear mark for next scan + mov rdi, [rdi + 9] + jmp .loop +.free: + mov rcx, [rdi + 9] + push rcx + call gc_free + pop rdi + jmp .loop +.done: + ret + +sys_mmap: + mov rax, 9 + mov rsi, rdi + mov rdi, 0 + mov rdx, 3 + mov r10, 34 + mov r8, -1 + mov r9, 0 + syscall + cmp rax, -1 + je .error + ret +.error: + mov rax, 60 + mov rdi, 1 + syscall + +sys_munmap: + mov rax, 11 + syscall + cmp rax, -1 + je .error + ret +.error: + mov rax, 60 + mov rdi, 1 + syscall \ No newline at end of file diff --git a/input/baseline/str_cmp.asm b/input/baseline/str_cmp.asm new file mode 100644 index 0000000..014efec --- /dev/null +++ b/input/baseline/str_cmp.asm @@ -0,0 +1,20 @@ +global str_cmp + +section .text +str_cmp: + xor rdx, rdx +.loop: + mov al, [rsi + rdx] + mov bl, [rdi + rdx] + inc rdx + cmp al, bl + jne .not_equal + cmp al, 0 + je .equal + jmp .loop +.not_equal: + mov rax, 0 + ret +.equal: + mov rax, 1 + ret \ No newline at end of file diff --git a/input/core/arr_size.asm b/input/core/arr_size.asm index b99ac73..5eb2c73 100644 --- a/input/core/arr_size.asm +++ b/input/core/arr_size.asm @@ -1,6 +1,6 @@ global arr_size -section .text +section .text arr_size: mov rax, [rdi] ret \ No newline at end of file diff --git a/input/core/str_len.asm b/input/core/str_len.asm index e296ab0..cbe742f 100644 --- a/input/core/str_len.asm +++ b/input/core/str_len.asm @@ -1,6 +1,6 @@ global str_len -section .text +section .text str_len: xor rax, rax .loop: diff --git a/output/build.sh b/output/build.sh index bd1156e..74ea336 100755 --- a/output/build.sh +++ b/output/build.sh @@ -1,7 +1,15 @@ #!/bin/sh -nasm -g -felf64 out.asm -o out.o + +# baseline +nasm -g -felf64 ../input/baseline/gc.asm -o gc.o +nasm -g -felf64 ../input/baseline/str_cmp.asm -o str_cmp.o + +# core nasm -g -felf64 ../input/core/str_len.asm -o str_len.o nasm -g -felf64 ../input/core/arr_size.asm -o arr_size.o nasm -g -felf64 ../input/core/itoa.asm -o itoa.o -ld -o out str_len.o arr_size.o itoa.o out.o +# program +nasm -g -felf64 out.asm -o out.o + +ld -o out str_len.o arr_size.o itoa.o gc.o str_cmp.o out.o