diff --git a/src/runtime/runtime.asm b/src/runtime/runtime.asm
index c050e9b..77261c6 100644
--- a/src/runtime/runtime.asm
+++ b/src/runtime/runtime.asm
@@ -3,18 +3,13 @@ extern main
 
 section .text
 _start:
-    ; The args already match our array structure, so we pass the result directly
     mov rdi, rsp
-
-    call main             ; main returns int in rax
-
-    ; Exit with main's return value
-    mov rdi, rax          ; exit code
-    mov rax, 60           ; syscall: exit
+    call main
+    mov rdi, rax
+    mov rax, 60
     syscall
 
 global nub_strcmp
-
 nub_strcmp:
     xor rdx, rdx
 .loop:
@@ -32,4 +27,74 @@ nub_strcmp:
 .equal:
     mov rax, 1
     ret
-    
\ No newline at end of file
+
+; TODO: This is ai-generated. Should be re-implemented in the future
+global nub_memset
+nub_memset:
+    ; Save original destination for return value
+    mov rax, rdi
+    
+    ; Handle zero length
+    test rdx, rdx
+    jz .done
+    
+    ; For small sizes, use simple byte-by-byte loop
+    cmp rdx, 16
+    jb .byte_loop
+    
+    ; Prepare value for bulk setting
+    ; Replicate the byte across all 8 bytes of rsi
+    and rsi, 0xFF           ; Ensure only low byte is used
+    mov rcx, rsi            ; rcx = byte value
+    shl rsi, 8
+    or rsi, rcx             ; rsi = byte | (byte << 8)
+    mov rcx, rsi
+    shl rsi, 16
+    or rsi, rcx             ; rsi = 4 copies of byte
+    mov rcx, rsi
+    shl rsi, 32
+    or rsi, rcx             ; rsi = 8 copies of byte
+    
+    ; Align to 8-byte boundary if needed
+    mov rcx, rdi
+    and rcx, 7              ; rcx = bytes until 8-byte aligned
+    jz .aligned
+    
+    ; Fill bytes until aligned
+    neg rcx
+    add rcx, 8              ; rcx = bytes to fill for alignment
+    cmp rcx, rdx
+    jbe .align_loop
+    mov rcx, rdx            ; Don't go past end
+.align_loop:
+    mov [rdi], sil
+    inc rdi
+    dec rdx
+    dec rcx
+    jnz .align_loop
+    
+.aligned:
+    ; Fill 8 bytes at a time
+    mov rcx, rdx
+    shr rcx, 3              ; rcx = number of 8-byte chunks
+    jz .remainder
+.quad_loop:
+    mov [rdi], rsi
+    add rdi, 8
+    dec rcx
+    jnz .quad_loop
+    
+    ; Handle remainder bytes
+    and rdx, 7              ; rdx = remaining bytes
+.remainder:
+    test rdx, rdx
+    jz .done
+    
+.byte_loop:
+    mov [rdi], sil
+    inc rdi
+    dec rdx
+    jnz .byte_loop
+    
+.done:
+    ret
\ No newline at end of file