From 9ccdd5f835ce7a6006985809d00747c56c038ce7 Mon Sep 17 00:00:00 2001 From: nub31 Date: Mon, 9 Feb 2026 22:03:00 +0100 Subject: [PATCH] tokenizer improvements --- compiler/Tokenizer.cs | 78 ++++++++++++++++++++++++++++++++++--------- compiler/test.nub | 1 - 2 files changed, 63 insertions(+), 16 deletions(-) diff --git a/compiler/Tokenizer.cs b/compiler/Tokenizer.cs index 568f730..85589db 100644 --- a/compiler/Tokenizer.cs +++ b/compiler/Tokenizer.cs @@ -18,9 +18,10 @@ public sealed class Tokenizer(string fileName, string contents) { var tokens = new List(); diagnostics = []; - try + + while (true) { - while (true) + try { if (!TryPeek(out var c)) break; @@ -31,12 +32,27 @@ public sealed class Tokenizer(string fileName, string contents) continue; } + if (c == '/' && Peek(1) == '/') + { + Consume(); + Consume(); + while (TryPeek(out c) && c != '\n') + { + Consume(); + } + + Consume(); + continue; + } + tokens.Add(ParseToken()); } - } - catch (CompileException e) - { - diagnostics.Add(e.Diagnostic); + catch (CompileException e) + { + diagnostics.Add(e.Diagnostic); + // Skip current token if parsing failed, this prevents an infinite loop when ParseToken fails before consuming any tokens + TryConsume(out _); + } } return tokens; @@ -57,6 +73,7 @@ public sealed class Tokenizer(string fileName, string contents) Consume(); var parsed = BigInteger.Zero; + var seenDigit = false; while (TryPeek(out c)) { @@ -69,6 +86,7 @@ public sealed class Tokenizer(string fileName, string contents) if (!char.IsAsciiHexDigit(c)) break; + seenDigit = true; parsed <<= 4; Consume(); @@ -81,6 +99,9 @@ public sealed class Tokenizer(string fileName, string contents) }; } + if (!seenDigit) + throw new CompileException(Diagnostic.Error("Expected hexadecimal digits after 0x").At(fileName, line, startColumn, column - startColumn).Build()); + return new TokenIntLiteral(line, startColumn, column - startColumn, parsed); } case '0' when Peek(1) is 'b': @@ -89,6 +110,7 @@ public sealed class Tokenizer(string fileName, string contents) Consume(); var parsed = BigInteger.Zero; + var seenDigit = false; while (TryPeek(out c)) { @@ -101,11 +123,16 @@ public sealed class Tokenizer(string fileName, string contents) if (c is not '0' and not '1') break; + seenDigit = true; parsed <<= 1; + if (Consume() == '1') parsed += BigInteger.One; } + if (!seenDigit) + throw new CompileException(Diagnostic.Error("Expected binary digits after 0b").At(fileName, line, startColumn, column - startColumn).Build()); + return new TokenIntLiteral(line, startColumn, column - startColumn, parsed); } default: @@ -137,16 +164,26 @@ public sealed class Tokenizer(string fileName, string contents) case '"': { Consume(); - var buf = new StringBuilder(); - while (TryPeek(out c) && c != '"') + while (true) + { + if (!TryPeek(out c)) + throw new CompileException(Diagnostic.Error("Unterminated string literal").At(fileName, line, column, 0).Build()); + + if (c == '"') + break; + + if (c == '\n') + throw new CompileException(Diagnostic.Error("Unterminated string literal").At(fileName, line, column, 1).Build()); + buf.Append(Consume()); + } Consume(); - return new TokenStringLiteral(line, startColumn, column - startColumn, buf.ToString()); } + case '{': { Consume(); @@ -353,17 +390,20 @@ public sealed class Tokenizer(string fileName, string contents) }; } - throw new Exception($"Unexpected character '{c}'"); + throw new CompileException(Diagnostic.Error($"Unexpected character '{c}'").At(fileName, line, column, 1).Build()); } } } - private char Consume() + private bool TryConsume(out char c) { if (index >= contents.Length) - throw new CompileException(Diagnostic.Error("Unexpected end of file").At(fileName, line, column, 0).Build()); + { + c = '\0'; + return false; + } - var c = contents[index]; + c = contents[index]; if (c == '\n') { @@ -377,6 +417,14 @@ public sealed class Tokenizer(string fileName, string contents) index += 1; + return true; + } + + private char Consume() + { + if (!TryConsume(out var c)) + throw new CompileException(Diagnostic.Error("Unexpected end of file").At(fileName, line, column, 0).Build()); + return c; } @@ -498,14 +546,14 @@ public static class TokenExtensions Symbol.OpenParen => "(", Symbol.CloseParen => ")", Symbol.Comma => ",", - Symbol.Period => ",", + Symbol.Period => ".", Symbol.Colon => ":", Symbol.ColonColon => "::", Symbol.Caret => "^", Symbol.Bang => "!", Symbol.Equal => "=", Symbol.EqualEqual => "==", - Symbol.BangEqual => "!+", + Symbol.BangEqual => "!=", Symbol.LessThan => "<", Symbol.LessThanLessThan => "<<", Symbol.LessThanEqual => "<=", diff --git a/compiler/test.nub b/compiler/test.nub index fc2b13c..ab245b3 100644 --- a/compiler/test.nub +++ b/compiler/test.nub @@ -14,7 +14,6 @@ func main(): i32 { let i: i32 = 0 x = 1 + 2 * 34 - while i < 10 { i = i + 1 x = i