using NubLang.Common; using NubLang.Diagnostics; namespace NubLang.Syntax.Tokenization; public sealed class Tokenizer { private static readonly Dictionary Keywords = new() { ["namespace"] = Symbol.Namespace, ["func"] = Symbol.Func, ["if"] = Symbol.If, ["else"] = Symbol.Else, ["while"] = Symbol.While, ["break"] = Symbol.Break, ["continue"] = Symbol.Continue, ["return"] = Symbol.Return, ["alloc"] = Symbol.Alloc, ["struct"] = Symbol.Struct, ["let"] = Symbol.Let, ["calls"] = Symbol.Calls, ["trait"] = Symbol.Trait, ["impl"] = Symbol.Impl, ["for"] = Symbol.For, ["extern"] = Symbol.Extern }; private static readonly Dictionary Chians = new() { [['=', '=']] = Symbol.Equal, [['!', '=']] = Symbol.NotEqual, [['<', '=']] = Symbol.LessThanOrEqual, [['>', '=']] = Symbol.GreaterThanOrEqual, [[':', ':']] = Symbol.DoubleColon, }; private static readonly Dictionary Chars = new() { [':'] = Symbol.Colon, ['('] = Symbol.OpenParen, [')'] = Symbol.CloseParen, ['{'] = Symbol.OpenBrace, ['}'] = Symbol.CloseBrace, ['['] = Symbol.OpenBracket, [']'] = Symbol.CloseBracket, [','] = Symbol.Comma, ['.'] = Symbol.Period, ['='] = Symbol.Assign, ['<'] = Symbol.LessThan, ['>'] = Symbol.GreaterThan, ['+'] = Symbol.Plus, ['-'] = Symbol.Minus, ['*'] = Symbol.Star, ['/'] = Symbol.ForwardSlash, ['!'] = Symbol.Bang, ['^'] = Symbol.Caret, ['&'] = Symbol.Ampersand, [';'] = Symbol.Semi, }; private readonly SourceText _sourceText; private int _index; public Tokenizer(SourceText sourceText) { _sourceText = sourceText; } public IEnumerable Tokenize(out IEnumerable diagnostics) { _index = 0; List tokens = []; while (ParseToken().TryGetValue(out var token)) { tokens.Add(token); } // TODO: Implement diagnostics diagnostics = []; return tokens; } private Optional ParseToken() { var startIndex = _index; if (!Peek().TryGetValue(out var current)) { return Optional.Empty(); } if (Peek().TryGetValue(out var character) && char.IsWhiteSpace(character)) { Next(); return ParseToken(); } if (current == '/' && Peek(1).TryGetValue(out var nextChar) && nextChar == '/') { Next(); Next(); while (Peek().TryGetValue(out var ch) && ch != '\n') { Next(); } return ParseToken(); } if (char.IsLetter(current) || current == '_') { var buffer = string.Empty; while (Peek().TryGetValue(out var next) && (char.IsLetterOrDigit(next) || next == '_')) { buffer += next; Next(); } if (Keywords.TryGetValue(buffer, out var keywordSymbol)) { return new SymbolToken(CreateSpan(startIndex), keywordSymbol); } if (buffer is "true" or "false") { return new LiteralToken(CreateSpan(startIndex), LiteralKind.Bool, buffer); } return new IdentifierToken(CreateSpan(startIndex), buffer); } if (char.IsDigit(current)) { var isFloat = false; var buffer = string.Empty; while (Peek().TryGetValue(out var next)) { if (next == '.') { if (isFloat) { throw new Exception("More than one period found in float literal"); } isFloat = true; buffer += next; Next(); } else if (char.IsDigit(next)) { buffer += next; Next(); } else { break; } } return new LiteralToken(CreateSpan(startIndex), isFloat ? LiteralKind.Float : LiteralKind.Integer, buffer); } // TODO: Revisit this foreach (var chain in Chians) { if (current != chain.Key[0]) continue; for (var i = 1; i < chain.Key.Length; i++) { var c = Peek(i); if (!c.HasValue || c.Value != chain.Key[i]) break; if (i == chain.Key.Length - 1) { for (var j = 0; j <= i; j++) { Next(); } return new SymbolToken(CreateSpan(startIndex), chain.Value); } } } if (Chars.TryGetValue(current, out var charSymbol)) { Next(); return new SymbolToken(CreateSpan(startIndex), charSymbol); } if (current == '"') { Next(); var buffer = string.Empty; while (true) { if (!Peek().TryGetValue(out var next)) { throw new Exception("Unclosed string literal"); } if (next == '"') { Next(); break; } buffer += next; Next(); } return new LiteralToken(CreateSpan(startIndex), LiteralKind.String, buffer); } throw new Exception($"Unknown character {current}"); } private SourceLocation CreateLocation(int index) { var line = 1; var column = 1; for (var i = 0; i < Math.Min(index, _sourceText.Content.Length - 1); i++) { if (_sourceText.Content[i] == '\n') { column = 1; line += 1; } else { column += 1; } } return new SourceLocation(line, column); } private SourceSpan CreateSpan(int startIndex) { return new SourceSpan(_sourceText, CreateLocation(startIndex), CreateLocation(_index)); } private Optional Peek(int offset = 0) { if (_index + offset < _sourceText.Content.Length) { return _sourceText.Content[_index + offset]; } return Optional.Empty(); } private Optional Next() { if (_index < _sourceText.Content.Length) { return _sourceText.Content[_index++]; } _index++; return Optional.Empty(); } }