using System.Numerics; using System.Text; namespace Compiler; public sealed class Tokenizer(string fileName, string contents) { public static List Tokenize(string fileName, string contents, out List diagnostics) { return new Tokenizer(fileName, contents).Tokenize(out diagnostics); } private int index; private int line = 1; private int column = 1; private List Tokenize(out List diagnostics) { var tokens = new List(); diagnostics = []; try { while (true) { if (!TryPeek(out var c)) break; if (char.IsWhiteSpace(c)) { Consume(); continue; } tokens.Add(ParseToken()); } } catch (CompileException e) { diagnostics.Add(e.Diagnostic); } return tokens; } private Token ParseToken() { var startColumn = column; var c = Peek()!.Value; if (char.IsDigit(c)) { switch (c) { case '0' when Peek(1) is 'x': { Consume(); Consume(); var parsed = BigInteger.Zero; while (TryPeek(out c)) { if (c == '_') { Consume(); continue; } if (!char.IsAsciiHexDigit(c)) break; parsed <<= 4; Consume(); parsed += c switch { >= '0' and <= '9' => c - '0', >= 'a' and <= 'f' => c - 'a' + 10, >= 'A' and <= 'F' => c - 'A' + 10, _ => 0 }; } return new TokenIntLiteral(line, startColumn, column - startColumn, parsed); } case '0' when Peek(1) is 'b': { Consume(); Consume(); var parsed = BigInteger.Zero; while (TryPeek(out c)) { if (c == '_') { Consume(); continue; } if (c is not '0' and not '1') break; parsed <<= 1; if (Consume() == '1') parsed += BigInteger.One; } return new TokenIntLiteral(line, startColumn, column - startColumn, parsed); } default: { var parsed = BigInteger.Zero; while (TryPeek(out c)) { if (c == '_') { Consume(); continue; } if (!char.IsDigit(c)) break; parsed *= 10; parsed += Consume() - '0'; } return new TokenIntLiteral(line, startColumn, column - startColumn, parsed); } } } switch (c) { case '"': { Consume(); var buf = new StringBuilder(); while (TryPeek(out c) && c != '"') buf.Append(Consume()); Consume(); return new TokenStringLiteral(line, startColumn, column - startColumn, buf.ToString()); } case '{': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.OpenCurly); } case '}': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.CloseCurly); } case '(': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.OpenParen); } case ')': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.CloseParen); } case ',': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Comma); } case '.': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Period); } case ':' when Peek(1) is ':': { Consume(); Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.ColonColon); } case ':': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Colon); } case '^': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Caret); } case '!' when Peek(1) is '=': { Consume(); Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.BangEqual); } case '!': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Bang); } case '=' when Peek(1) is '=': { Consume(); Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.EqualEqual); } case '=': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Equal); } case '<' when Peek(1) is '<': { Consume(); Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.LessThanLessThan); } case '<' when Peek(1) is '=': { Consume(); Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.LessThanEqual); } case '<': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.LessThan); } case '>' when Peek(1) is '>': { Consume(); Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.GreaterThanGreaterThan); } case '>' when Peek(1) is '=': { Consume(); Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.GreaterThanEqual); } case '>': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.GreaterThan); } case '+' when Peek(1) is '=': { Consume(); Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.PlusEqual); } case '+': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Plus); } case '-' when Peek(1) is '=': { Consume(); Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.MinusEqual); } case '-': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Minus); } case '*' when Peek(1) is '=': { Consume(); Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.StarEqual); } case '*': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Star); } case '/' when Peek(1) is '=': { Consume(); Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.ForwardSlashEqual); } case '/': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.ForwardSlash); } case '%' when Peek(1) is '=': { Consume(); Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.PercentEqual); } case '%': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Percent); } case '&' when Peek(1) is '&': { Consume(); Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.AmpersandAmpersand); } case '&': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Ampersand); } case '|' when Peek(1) is '|': { Consume(); Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.PipePipe); } case '|': { Consume(); return new TokenSymbol(line, startColumn, column - startColumn, Symbol.Pipe); } default: { if (char.IsLetter(c) || c == '_') { var buf = new StringBuilder(); while (TryPeek(out c) && (char.IsLetterOrDigit(c) || c == '_')) buf.Append(Consume()); var value = buf.ToString(); return value switch { "func" => new TokenKeyword(line, startColumn, column - startColumn, Keyword.Func), "struct" => new TokenKeyword(line, startColumn, column - startColumn, Keyword.Struct), "let" => new TokenKeyword(line, startColumn, column - startColumn, Keyword.Let), "if" => new TokenKeyword(line, startColumn, column - startColumn, Keyword.If), "else" => new TokenKeyword(line, startColumn, column - startColumn, Keyword.Else), "while" => new TokenKeyword(line, startColumn, column - startColumn, Keyword.While), "return" => new TokenKeyword(line, startColumn, column - startColumn, Keyword.Return), "module" => new TokenKeyword(line, startColumn, column - startColumn, Keyword.Module), "true" => new TokenBoolLiteral(line, startColumn, column - startColumn, true), "false" => new TokenBoolLiteral(line, startColumn, column - startColumn, false), _ => new TokenIdent(line, startColumn, column - startColumn, value) }; } throw new Exception($"Unexpected character '{c}'"); } } } private char Consume() { if (index >= contents.Length) throw new CompileException(Diagnostic.Error("Unexpected end of file").At(fileName, line, column, 0).Build()); var c = contents[index]; if (c == '\n') { line += 1; column = 1; } else { column += 1; } index += 1; return c; } private char? Peek(int offset = 0) { if (index + offset >= contents.Length) return null; return contents[index + offset]; } private bool TryPeek(out char c) { if (index >= contents.Length) { c = '\0'; return false; } c = contents[index]; return true; } } public abstract class Token(int line, int column, int length) { public int Line { get; } = line; public int Column { get; } = column; public int Length { get; } = length; } public sealed class TokenIdent(int line, int column, int length, string ident) : Token(line, column, length) { public string Ident { get; } = ident; } public sealed class TokenIntLiteral(int line, int column, int length, BigInteger value) : Token(line, column, length) { public BigInteger Value { get; } = value; } public sealed class TokenStringLiteral(int line, int column, int length, string value) : Token(line, column, length) { public string Value { get; } = value; } public sealed class TokenBoolLiteral(int line, int column, int length, bool value) : Token(line, column, length) { public bool Value { get; } = value; } public enum Symbol { OpenCurly, CloseCurly, OpenParen, CloseParen, Comma, Period, Colon, ColonColon, Caret, Bang, Equal, EqualEqual, BangEqual, LessThan, LessThanLessThan, LessThanEqual, GreaterThan, GreaterThanGreaterThan, GreaterThanEqual, Plus, PlusEqual, Minus, MinusEqual, Star, StarEqual, ForwardSlash, ForwardSlashEqual, Percent, PercentEqual, Ampersand, AmpersandAmpersand, Pipe, PipePipe, } public sealed class TokenSymbol(int line, int column, int length, Symbol symbol) : Token(line, column, length) { public Symbol Symbol { get; } = symbol; } public enum Keyword { Func, Struct, Let, If, Else, While, Return, Module, } public sealed class TokenKeyword(int line, int column, int length, Keyword keyword) : Token(line, column, length) { public Keyword Keyword { get; } = keyword; } public static class TokenExtensions { public static string AsString(this Symbol symbol) { return symbol switch { Symbol.OpenCurly => "{", Symbol.CloseCurly => "}", Symbol.OpenParen => "(", Symbol.CloseParen => ")", Symbol.Comma => ",", Symbol.Period => ",", Symbol.Colon => ":", Symbol.ColonColon => "::", Symbol.Caret => "^", Symbol.Bang => "!", Symbol.Equal => "=", Symbol.EqualEqual => "==", Symbol.BangEqual => "!+", Symbol.LessThan => "<", Symbol.LessThanLessThan => "<<", Symbol.LessThanEqual => "<=", Symbol.GreaterThan => ">", Symbol.GreaterThanGreaterThan => ">>", Symbol.GreaterThanEqual => ">=", Symbol.Plus => "+", Symbol.PlusEqual => "+=", Symbol.Minus => "-", Symbol.MinusEqual => "-=", Symbol.Star => "*", Symbol.StarEqual => "*=", Symbol.ForwardSlash => "/", Symbol.ForwardSlashEqual => "/=", Symbol.Percent => "%", Symbol.PercentEqual => "%=", Symbol.Ampersand => "&", Symbol.AmpersandAmpersand => "&&", Symbol.Pipe => "|", Symbol.PipePipe => "||", _ => throw new ArgumentOutOfRangeException(nameof(symbol), symbol, null) }; } public static string AsString(this Keyword symbol) { return symbol switch { Keyword.Func => "func", Keyword.Struct => "struct", Keyword.Let => "let", Keyword.If => "if", Keyword.Else => "else", Keyword.While => "while", Keyword.Return => "return", Keyword.Module => "module", _ => throw new ArgumentOutOfRangeException(nameof(symbol), symbol, null) }; } }