tokenizer improvements
This commit is contained in:
@@ -18,9 +18,10 @@ public sealed class Tokenizer(string fileName, string contents)
|
||||
{
|
||||
var tokens = new List<Token>();
|
||||
diagnostics = [];
|
||||
try
|
||||
{
|
||||
|
||||
while (true)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (!TryPeek(out var c))
|
||||
break;
|
||||
@@ -31,12 +32,27 @@ public sealed class Tokenizer(string fileName, string contents)
|
||||
continue;
|
||||
}
|
||||
|
||||
tokens.Add(ParseToken());
|
||||
if (c == '/' && Peek(1) == '/')
|
||||
{
|
||||
Consume();
|
||||
Consume();
|
||||
while (TryPeek(out c) && c != '\n')
|
||||
{
|
||||
Consume();
|
||||
}
|
||||
|
||||
Consume();
|
||||
continue;
|
||||
}
|
||||
|
||||
tokens.Add(ParseToken());
|
||||
}
|
||||
catch (CompileException e)
|
||||
{
|
||||
diagnostics.Add(e.Diagnostic);
|
||||
// Skip current token if parsing failed, this prevents an infinite loop when ParseToken fails before consuming any tokens
|
||||
TryConsume(out _);
|
||||
}
|
||||
}
|
||||
|
||||
return tokens;
|
||||
@@ -57,6 +73,7 @@ public sealed class Tokenizer(string fileName, string contents)
|
||||
Consume();
|
||||
|
||||
var parsed = BigInteger.Zero;
|
||||
var seenDigit = false;
|
||||
|
||||
while (TryPeek(out c))
|
||||
{
|
||||
@@ -69,6 +86,7 @@ public sealed class Tokenizer(string fileName, string contents)
|
||||
if (!char.IsAsciiHexDigit(c))
|
||||
break;
|
||||
|
||||
seenDigit = true;
|
||||
parsed <<= 4;
|
||||
|
||||
Consume();
|
||||
@@ -81,6 +99,9 @@ public sealed class Tokenizer(string fileName, string contents)
|
||||
};
|
||||
}
|
||||
|
||||
if (!seenDigit)
|
||||
throw new CompileException(Diagnostic.Error("Expected hexadecimal digits after 0x").At(fileName, line, startColumn, column - startColumn).Build());
|
||||
|
||||
return new TokenIntLiteral(line, startColumn, column - startColumn, parsed);
|
||||
}
|
||||
case '0' when Peek(1) is 'b':
|
||||
@@ -89,6 +110,7 @@ public sealed class Tokenizer(string fileName, string contents)
|
||||
Consume();
|
||||
|
||||
var parsed = BigInteger.Zero;
|
||||
var seenDigit = false;
|
||||
|
||||
while (TryPeek(out c))
|
||||
{
|
||||
@@ -101,11 +123,16 @@ public sealed class Tokenizer(string fileName, string contents)
|
||||
if (c is not '0' and not '1')
|
||||
break;
|
||||
|
||||
seenDigit = true;
|
||||
parsed <<= 1;
|
||||
|
||||
if (Consume() == '1')
|
||||
parsed += BigInteger.One;
|
||||
}
|
||||
|
||||
if (!seenDigit)
|
||||
throw new CompileException(Diagnostic.Error("Expected binary digits after 0b").At(fileName, line, startColumn, column - startColumn).Build());
|
||||
|
||||
return new TokenIntLiteral(line, startColumn, column - startColumn, parsed);
|
||||
}
|
||||
default:
|
||||
@@ -137,16 +164,26 @@ public sealed class Tokenizer(string fileName, string contents)
|
||||
case '"':
|
||||
{
|
||||
Consume();
|
||||
|
||||
var buf = new StringBuilder();
|
||||
|
||||
while (TryPeek(out c) && c != '"')
|
||||
while (true)
|
||||
{
|
||||
if (!TryPeek(out c))
|
||||
throw new CompileException(Diagnostic.Error("Unterminated string literal").At(fileName, line, column, 0).Build());
|
||||
|
||||
if (c == '"')
|
||||
break;
|
||||
|
||||
if (c == '\n')
|
||||
throw new CompileException(Diagnostic.Error("Unterminated string literal").At(fileName, line, column, 1).Build());
|
||||
|
||||
buf.Append(Consume());
|
||||
}
|
||||
|
||||
Consume();
|
||||
|
||||
return new TokenStringLiteral(line, startColumn, column - startColumn, buf.ToString());
|
||||
}
|
||||
|
||||
case '{':
|
||||
{
|
||||
Consume();
|
||||
@@ -353,17 +390,20 @@ public sealed class Tokenizer(string fileName, string contents)
|
||||
};
|
||||
}
|
||||
|
||||
throw new Exception($"Unexpected character '{c}'");
|
||||
throw new CompileException(Diagnostic.Error($"Unexpected character '{c}'").At(fileName, line, column, 1).Build());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private char Consume()
|
||||
private bool TryConsume(out char c)
|
||||
{
|
||||
if (index >= contents.Length)
|
||||
throw new CompileException(Diagnostic.Error("Unexpected end of file").At(fileName, line, column, 0).Build());
|
||||
{
|
||||
c = '\0';
|
||||
return false;
|
||||
}
|
||||
|
||||
var c = contents[index];
|
||||
c = contents[index];
|
||||
|
||||
if (c == '\n')
|
||||
{
|
||||
@@ -377,6 +417,14 @@ public sealed class Tokenizer(string fileName, string contents)
|
||||
|
||||
index += 1;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private char Consume()
|
||||
{
|
||||
if (!TryConsume(out var c))
|
||||
throw new CompileException(Diagnostic.Error("Unexpected end of file").At(fileName, line, column, 0).Build());
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
@@ -498,14 +546,14 @@ public static class TokenExtensions
|
||||
Symbol.OpenParen => "(",
|
||||
Symbol.CloseParen => ")",
|
||||
Symbol.Comma => ",",
|
||||
Symbol.Period => ",",
|
||||
Symbol.Period => ".",
|
||||
Symbol.Colon => ":",
|
||||
Symbol.ColonColon => "::",
|
||||
Symbol.Caret => "^",
|
||||
Symbol.Bang => "!",
|
||||
Symbol.Equal => "=",
|
||||
Symbol.EqualEqual => "==",
|
||||
Symbol.BangEqual => "!+",
|
||||
Symbol.BangEqual => "!=",
|
||||
Symbol.LessThan => "<",
|
||||
Symbol.LessThanLessThan => "<<",
|
||||
Symbol.LessThanEqual => "<=",
|
||||
|
||||
@@ -14,7 +14,6 @@ func main(): i32 {
|
||||
let i: i32 = 0
|
||||
|
||||
x = 1 + 2 * 34
|
||||
|
||||
while i < 10 {
|
||||
i = i + 1
|
||||
x = i
|
||||
|
||||
Reference in New Issue
Block a user