using System.Diagnostics.CodeAnalysis; using NubLang.Diagnostics; namespace NubLang.Syntax; public sealed class Parser { private List _tokens = []; private int _tokenIndex; private Token? CurrentToken => _tokenIndex < _tokens.Count ? _tokens[_tokenIndex] : null; private bool HasToken => CurrentToken != null; public List Diagnostics { get; } = []; public SyntaxTree Parse(List tokens) { Diagnostics.Clear(); _tokens = tokens; _tokenIndex = 0; var topLevelSyntaxNodes = new List(); while (HasToken) { try { var startIndex = _tokenIndex; var exported = TryExpectSymbol(Symbol.Export); var packed = TryExpectSymbol(Symbol.Packed); if (TryExpectSymbol(Symbol.Extern)) { var externSymbol = ExpectStringLiteral(); ExpectSymbol(Symbol.Func); topLevelSyntaxNodes.Add(ParseFunc(startIndex, exported, externSymbol)); continue; } var keyword = ExpectSymbol(); TopLevelSyntaxNode definition = keyword.Symbol switch { Symbol.Module => ParseModule(startIndex), Symbol.Import => ParseImport(startIndex), Symbol.Func => ParseFunc(startIndex, exported, null), Symbol.Struct => ParseStruct(startIndex, exported, packed), Symbol.Enum => ParseEnum(startIndex, exported), _ => throw new CompileException(Diagnostic .Error($"Expected 'func', 'struct', 'enum', 'import' or 'module' but found '{keyword.Symbol}'") .WithHelp("Valid top level statements are 'func', 'struct', 'enum', 'import' and 'module'") .At(keyword) .Build()) }; topLevelSyntaxNodes.Add(definition); } catch (CompileException e) { Diagnostics.Add(e.Diagnostic); while (HasToken) { if (CurrentToken is SymbolToken { Symbol: Symbol.Extern or Symbol.Func or Symbol.Struct }) { break; } Next(); } } } return new SyntaxTree(topLevelSyntaxNodes); } private ImportSyntax ParseImport(int startIndex) { var name = ExpectIdentifier(); return new ImportSyntax(GetTokens(startIndex), name); } private ModuleSyntax ParseModule(int startIndex) { var name = ExpectIdentifier(); return new ModuleSyntax(GetTokens(startIndex), name); } private FuncParameterSyntax ParseFuncParameter() { var startIndex = _tokenIndex; var name = ExpectIdentifier(); ExpectSymbol(Symbol.Colon); var type = ParseType(); return new FuncParameterSyntax(GetTokens(startIndex), name, type); } private FuncSyntax ParseFunc(int startIndex, bool exported, StringLiteralToken? externSymbol) { var name = ExpectIdentifier(); List parameters = []; ExpectSymbol(Symbol.OpenParen); while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseFuncParameter()); if (!TryExpectSymbol(Symbol.Comma)) { ExpectSymbol(Symbol.CloseParen); break; } } var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new VoidTypeSyntax([]); var prototype = new FuncPrototypeSyntax(GetTokens(startIndex), name, exported, externSymbol, parameters, returnType); BlockSyntax? body = null; var bodyStartIndex = _tokenIndex; if (TryExpectSymbol(Symbol.OpenBrace)) { body = ParseBlock(bodyStartIndex); } return new FuncSyntax(GetTokens(startIndex), prototype, body); } private StructSyntax ParseStruct(int startIndex, bool exported, bool packed) { var name = ExpectIdentifier(); ExpectSymbol(Symbol.OpenBrace); List fields = []; while (!TryExpectSymbol(Symbol.CloseBrace)) { var memberStartIndex = _tokenIndex; var fieldName = ExpectIdentifier(); ExpectSymbol(Symbol.Colon); var fieldType = ParseType(); ExpressionSyntax? fieldValue = null; if (TryExpectSymbol(Symbol.Assign)) { fieldValue = ParseExpression(); } fields.Add(new StructFieldSyntax(GetTokens(memberStartIndex), fieldName, fieldType, fieldValue)); } return new StructSyntax(GetTokens(startIndex), name, exported, packed, fields); } private EnumSyntax ParseEnum(int startIndex, bool exported) { var name = ExpectIdentifier(); TypeSyntax? type = null; if (TryExpectSymbol(Symbol.Colon)) { type = ParseType(); } List fields = []; ExpectSymbol(Symbol.OpenBrace); while (!TryExpectSymbol(Symbol.CloseBrace)) { var memberStartIndex = _tokenIndex; var fieldName = ExpectIdentifier(); IntLiteralToken? value = null; if (TryExpectSymbol(Symbol.Assign)) { if (!TryExpectIntLiteral(out var intLiteralToken)) { throw new CompileException(Diagnostic .Error("Value of enum field must be an integer literal") .At(CurrentToken) .Build()); } value = intLiteralToken; } fields.Add(new EnumFieldSyntax(GetTokens(memberStartIndex), fieldName, value)); } return new EnumSyntax(GetTokens(startIndex), name, exported, type, fields); } private StatementSyntax ParseStatement() { var startIndex = _tokenIndex; if (TryExpectSymbol(out var symbol)) { switch (symbol) { case Symbol.OpenBrace: return ParseBlock(startIndex); case Symbol.Return: return ParseReturn(startIndex); case Symbol.If: return ParseIf(startIndex); case Symbol.While: return ParseWhile(startIndex); case Symbol.For: return ParseFor(startIndex); case Symbol.Let: return ParseVariableDeclaration(startIndex); case Symbol.Defer: return ParseDefer(startIndex); case Symbol.Break: return new BreakSyntax(GetTokens(startIndex)); case Symbol.Continue: return new ContinueSyntax(GetTokens(startIndex)); } } var expr = ParseExpression(); if (TryExpectSymbol(Symbol.Assign)) { var value = ParseExpression(); return new AssignmentSyntax(GetTokens(startIndex), expr, value); } return new StatementExpressionSyntax(GetTokens(startIndex), expr); } private VariableDeclarationSyntax ParseVariableDeclaration(int startIndex) { var name = ExpectIdentifier(); TypeSyntax? explicitType = null; if (TryExpectSymbol(Symbol.Colon)) { explicitType = ParseType(); } ExpressionSyntax? assignment = null; if (TryExpectSymbol(Symbol.Assign)) { assignment = ParseExpression(); } return new VariableDeclarationSyntax(GetTokens(startIndex), name, explicitType, assignment); } private DeferSyntax ParseDefer(int startIndex) { var statement = ParseStatement(); return new DeferSyntax(GetTokens(startIndex), statement); } private ReturnSyntax ParseReturn(int startIndex) { ExpressionSyntax? value = null; if (!TryExpectSymbol(Symbol.Semi)) { value = ParseExpression(); } return new ReturnSyntax(GetTokens(startIndex), value); } private IfSyntax ParseIf(int startIndex) { var condition = ParseExpression(); var body = ParseBlock(); Variant? elseStatement = null; var elseStartIndex = _tokenIndex; if (TryExpectSymbol(Symbol.Else)) { if (TryExpectSymbol(Symbol.If)) { elseStatement = (Variant)ParseIf(elseStartIndex); } else { elseStatement = (Variant)ParseBlock(); } } return new IfSyntax(GetTokens(startIndex), condition, body, elseStatement); } private WhileSyntax ParseWhile(int startIndex) { var condition = ParseExpression(); var body = ParseBlock(); return new WhileSyntax(GetTokens(startIndex), condition, body); } private ForSyntax ParseFor(int startIndex) { var itemName = ExpectIdentifier(); IdentifierToken? indexName = null; if (TryExpectSymbol(Symbol.Comma)) { indexName = ExpectIdentifier(); } ExpectSymbol(Symbol.In); var target = ParseExpression(); var body = ParseBlock(); return new ForSyntax(GetTokens(startIndex), itemName, indexName, target, body); } private ExpressionSyntax ParseExpression(int precedence = 0) { var startIndex = _tokenIndex; var left = ParsePrimaryExpression(); while (CurrentToken is SymbolToken symbolToken && TryGetBinaryOperator(symbolToken.Symbol, out var op) && GetBinaryOperatorPrecedence(op.Value) >= precedence) { Next(); var right = ParseExpression(GetBinaryOperatorPrecedence(op.Value) + 1); left = new BinaryExpressionSyntax(GetTokens(startIndex), left, op.Value, right); } return left; } private static int GetBinaryOperatorPrecedence(BinaryOperatorSyntax operatorSyntax) { return operatorSyntax switch { BinaryOperatorSyntax.Multiply => 10, BinaryOperatorSyntax.Divide => 10, BinaryOperatorSyntax.Modulo => 10, BinaryOperatorSyntax.Plus => 9, BinaryOperatorSyntax.Minus => 9, BinaryOperatorSyntax.LeftShift => 8, BinaryOperatorSyntax.RightShift => 8, BinaryOperatorSyntax.GreaterThan => 7, BinaryOperatorSyntax.GreaterThanOrEqual => 7, BinaryOperatorSyntax.LessThan => 7, BinaryOperatorSyntax.LessThanOrEqual => 7, BinaryOperatorSyntax.Equal => 7, BinaryOperatorSyntax.NotEqual => 7, BinaryOperatorSyntax.BitwiseAnd => 6, BinaryOperatorSyntax.BitwiseXor => 5, BinaryOperatorSyntax.BitwiseOr => 4, BinaryOperatorSyntax.LogicalAnd => 3, BinaryOperatorSyntax.LogicalOr => 2, _ => throw new ArgumentOutOfRangeException(nameof(operatorSyntax), operatorSyntax, null) }; } private bool TryGetBinaryOperator(Symbol symbol, [NotNullWhen(true)] out BinaryOperatorSyntax? binaryExpressionOperator) { switch (symbol) { case Symbol.Equal: binaryExpressionOperator = BinaryOperatorSyntax.Equal; return true; case Symbol.NotEqual: binaryExpressionOperator = BinaryOperatorSyntax.NotEqual; return true; case Symbol.LessThan: binaryExpressionOperator = BinaryOperatorSyntax.LessThan; return true; case Symbol.LessThanOrEqual: binaryExpressionOperator = BinaryOperatorSyntax.LessThanOrEqual; return true; case Symbol.GreaterThan: binaryExpressionOperator = BinaryOperatorSyntax.GreaterThan; return true; case Symbol.GreaterThanOrEqual: binaryExpressionOperator = BinaryOperatorSyntax.GreaterThanOrEqual; return true; case Symbol.And: binaryExpressionOperator = BinaryOperatorSyntax.LogicalAnd; return true; case Symbol.Or: binaryExpressionOperator = BinaryOperatorSyntax.LogicalOr; return true; case Symbol.Plus: binaryExpressionOperator = BinaryOperatorSyntax.Plus; return true; case Symbol.Minus: binaryExpressionOperator = BinaryOperatorSyntax.Minus; return true; case Symbol.Star: binaryExpressionOperator = BinaryOperatorSyntax.Multiply; return true; case Symbol.ForwardSlash: binaryExpressionOperator = BinaryOperatorSyntax.Divide; return true; case Symbol.Percent: binaryExpressionOperator = BinaryOperatorSyntax.Modulo; return true; case Symbol.LeftShift: binaryExpressionOperator = BinaryOperatorSyntax.LeftShift; return true; case Symbol.RightShift: binaryExpressionOperator = BinaryOperatorSyntax.RightShift; return true; case Symbol.Ampersand: binaryExpressionOperator = BinaryOperatorSyntax.BitwiseAnd; return true; case Symbol.Pipe: binaryExpressionOperator = BinaryOperatorSyntax.BitwiseOr; return true; case Symbol.XOr: binaryExpressionOperator = BinaryOperatorSyntax.BitwiseXor; return true; default: binaryExpressionOperator = null; return false; } } private ExpressionSyntax ParsePrimaryExpression() { var startIndex = _tokenIndex; var token = ExpectToken(); var expr = token switch { BoolLiteralToken boolLiteral => new BoolLiteralSyntax(GetTokens(startIndex), boolLiteral), StringLiteralToken stringLiteral => new StringLiteralSyntax(GetTokens(startIndex), stringLiteral), FloatLiteralToken floatLiteral => new FloatLiteralSyntax(GetTokens(startIndex), floatLiteral), IntLiteralToken intLiteral => new IntLiteralSyntax(GetTokens(startIndex), intLiteral), IdentifierToken identifier => ParseIdentifier(startIndex, identifier), SymbolToken symbolToken => symbolToken.Symbol switch { Symbol.Ampersand => new AddressOfSyntax(GetTokens(startIndex), ParsePrimaryExpression()), Symbol.OpenParen => ParseParenthesizedExpression(), Symbol.Minus => new UnaryExpressionSyntax(GetTokens(startIndex), UnaryOperatorSyntax.Negate, ParsePrimaryExpression()), Symbol.Bang => new UnaryExpressionSyntax(GetTokens(startIndex), UnaryOperatorSyntax.Invert, ParsePrimaryExpression()), Symbol.OpenBracket => ParseArrayInitializer(startIndex), Symbol.OpenBrace => new StructInitializerSyntax(GetTokens(startIndex), null, ParseStructInitializerBody()), Symbol.Struct => ParseStructInitializer(startIndex), Symbol.At => ParseBuiltinFunction(startIndex), _ => throw new CompileException(Diagnostic .Error($"Unexpected symbol '{symbolToken.Symbol}' in expression") .WithHelp("Expected '(', '-', '!', '[' or '{'") .At(symbolToken) .Build()) }, _ => throw new CompileException(Diagnostic .Error($"Unexpected token '{token.GetType().Name}' in expression") .WithHelp("Expected literal, identifier, or parenthesized expression") .At(token) .Build()) }; return ParsePostfixOperators(expr); } private ExpressionSyntax ParseBuiltinFunction(int startIndex) { var name = ExpectIdentifier(); ExpectSymbol(Symbol.OpenParen); switch (name.Value) { case "size": { var type = ParseType(); ExpectSymbol(Symbol.CloseParen); return new SizeSyntax(GetTokens(startIndex), type); } case "cast": { var expression = ParseExpression(); ExpectSymbol(Symbol.CloseParen); return new CastSyntax(GetTokens(startIndex), expression); } default: { throw new CompileException(Diagnostic.Error($"Unknown builtin {name.Value}").At(name).Build()); } } } private ExpressionSyntax ParseIdentifier(int startIndex, IdentifierToken identifier) { if (TryExpectSymbol(Symbol.DoubleColon)) { var name = ExpectIdentifier(); return new ModuleIdentifierSyntax(GetTokens(startIndex), identifier, name); } return new LocalIdentifierSyntax(GetTokens(startIndex), identifier); } private ExpressionSyntax ParseParenthesizedExpression() { var expression = ParseExpression(); ExpectSymbol(Symbol.CloseParen); return expression; } private ExpressionSyntax ParsePostfixOperators(ExpressionSyntax expr) { var startIndex = _tokenIndex; while (HasToken) { if (TryExpectSymbol(Symbol.Caret)) { expr = new DereferenceSyntax(GetTokens(startIndex), expr); continue; } if (TryExpectSymbol(Symbol.Period)) { var member = ExpectIdentifier(); expr = new MemberAccessSyntax(GetTokens(startIndex), expr, member); continue; } if (TryExpectSymbol(Symbol.OpenBracket)) { var index = ParseExpression(); ExpectSymbol(Symbol.CloseBracket); expr = new ArrayIndexAccessSyntax(GetTokens(startIndex), expr, index); continue; } if (TryExpectSymbol(Symbol.OpenParen)) { var parameters = new List(); while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseExpression()); if (!TryExpectSymbol(Symbol.Comma)) { ExpectSymbol(Symbol.CloseParen); break; } } expr = new FuncCallSyntax(GetTokens(startIndex), expr, parameters); continue; } break; } return expr; } private ExpressionSyntax ParseArrayInitializer(int startIndex) { var values = new List(); while (!TryExpectSymbol(Symbol.CloseBracket)) { values.Add(ParseExpression()); if (!TryExpectSymbol(Symbol.Comma)) { ExpectSymbol(Symbol.CloseBracket); break; } } return new ArrayInitializerSyntax(GetTokens(startIndex), values); } private StructInitializerSyntax ParseStructInitializer(int startIndex) { TypeSyntax? type = null; if (!TryExpectSymbol(Symbol.OpenBrace)) { type = ParseType(); ExpectSymbol(Symbol.OpenBrace); } var initializers = ParseStructInitializerBody(); return new StructInitializerSyntax(GetTokens(startIndex), type, initializers); } private Dictionary ParseStructInitializerBody() { Dictionary initializers = []; while (!TryExpectSymbol(Symbol.CloseBrace)) { var name = ExpectIdentifier(); ExpectSymbol(Symbol.Assign); var value = ParseExpression(); initializers.Add(name, value); } return initializers; } private BlockSyntax ParseBlock() { var startIndex = _tokenIndex; ExpectSymbol(Symbol.OpenBrace); return ParseBlock(startIndex); } private BlockSyntax ParseBlock(int startIndex) { List statements = []; while (!TryExpectSymbol(Symbol.CloseBrace)) { try { statements.Add(ParseStatement()); } catch (CompileException ex) { Diagnostics.Add(ex.Diagnostic); if (HasToken) { Next(); } else { break; } } } return new BlockSyntax(GetTokens(startIndex), statements); } private TypeSyntax ParseType() { var startIndex = _tokenIndex; if (TryExpectIdentifier(out var name)) { if (name.Value[0] == 'u' && ulong.TryParse(name.Value[1..], out var size)) { if (size is not 8 and not 16 and not 32 and not 64) { throw new CompileException(Diagnostic .Error("Arbitrary uint size is not supported") .WithHelp("Use u8, u16, u32 or u64") .At(name) .Build()); } return new IntTypeSyntax(GetTokens(startIndex), false, size); } if (name.Value[0] == 'i' && ulong.TryParse(name.Value[1..], out size)) { if (size is not 8 and not 16 and not 32 and not 64) { throw new CompileException(Diagnostic .Error("Arbitrary int size is not supported") .WithHelp("Use i8, i16, i32 or i64") .At(name) .Build()); } return new IntTypeSyntax(GetTokens(startIndex), true, size); } if (name.Value[0] == 'f' && ulong.TryParse(name.Value[1..], out size)) { if (size is not 32 and not 64) { throw new CompileException(Diagnostic .Error("Arbitrary float size is not supported") .WithHelp("Use f32 or f64") .At(name) .Build()); } return new FloatTypeSyntax(GetTokens(startIndex), size); } switch (name.Value) { case "void": return new VoidTypeSyntax(GetTokens(startIndex)); case "string": return new StringTypeSyntax(GetTokens(startIndex)); case "bool": return new BoolTypeSyntax(GetTokens(startIndex)); default: { IdentifierToken? module = null; if (TryExpectSymbol(Symbol.DoubleColon)) { var customTypeName = ExpectIdentifier(); module = name; name = customTypeName; } return new CustomTypeSyntax(GetTokens(startIndex), module, name); } } } if (TryExpectSymbol(Symbol.Caret)) { var baseType = ParseType(); return new PointerTypeSyntax(GetTokens(startIndex), baseType); } if (TryExpectSymbol(Symbol.Func)) { ExpectSymbol(Symbol.OpenParen); List parameters = []; while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseType()); if (!TryExpectSymbol(Symbol.Comma)) { ExpectSymbol(Symbol.CloseParen); break; } } var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new VoidTypeSyntax([]); return new FuncTypeSyntax(GetTokens(startIndex), parameters, returnType); } if (TryExpectSymbol(Symbol.OpenBracket)) { if (TryExpectIntLiteral(out var intLiteral)) { ExpectSymbol(Symbol.CloseBracket); var baseType = ParseType(); return new ConstArrayTypeSyntax(GetTokens(startIndex), baseType, intLiteral.AsU64); } else if (TryExpectSymbol(Symbol.QuestionMark)) { ExpectSymbol(Symbol.CloseBracket); var baseType = ParseType(); return new ArrayTypeSyntax(GetTokens(startIndex), baseType); } else { ExpectSymbol(Symbol.CloseBracket); var baseType = ParseType(); return new SliceTypeSyntax(GetTokens(startIndex), baseType); } } throw new CompileException(Diagnostic .Error("Invalid type syntax") .WithHelp("Expected type name, '^' for pointer, or '[]' for array") .At(CurrentToken) .Build()); } private Token ExpectToken() { if (!HasToken) { throw new CompileException(Diagnostic .Error("Unexpected end of file") .WithHelp("Expected more tokens to complete the syntax") .At(_tokens[^1]) .Build()); } var token = CurrentToken!; Next(); return token; } private SymbolToken ExpectSymbol() { var token = ExpectToken(); if (token is not SymbolToken symbol) { throw new CompileException(Diagnostic .Error($"Expected symbol, but found {token.GetType().Name}") .WithHelp("This position requires a symbol like '(', ')', '{', '}', etc.") .At(token) .Build()); } return symbol; } private void ExpectSymbol(Symbol expectedSymbol) { var token = ExpectSymbol(); if (token.Symbol != expectedSymbol) { throw new CompileException(Diagnostic .Error($"Expected '{expectedSymbol}', but found '{token.Symbol}'") .WithHelp($"Insert '{expectedSymbol}' here") .At(token) .Build()); } } private bool TryExpectSymbol(out Symbol symbol) { if (CurrentToken is SymbolToken symbolToken) { Next(); symbol = symbolToken.Symbol; return true; } symbol = default; return false; } private bool TryExpectSymbol(Symbol symbol) { if (CurrentToken is SymbolToken symbolToken && symbolToken.Symbol == symbol) { Next(); return true; } return false; } private bool TryExpectIdentifier([NotNullWhen(true)] out IdentifierToken? identifier) { if (CurrentToken is IdentifierToken identifierToken) { identifier = identifierToken; Next(); return true; } identifier = null; return false; } private IdentifierToken ExpectIdentifier() { var token = ExpectToken(); if (token is not IdentifierToken identifier) { throw new CompileException(Diagnostic .Error($"Expected identifier, but found {token.GetType().Name}") .WithHelp("Provide a valid identifier name here") .At(token) .Build()); } return identifier; } private bool TryExpectIntLiteral([NotNullWhen(true)] out IntLiteralToken? stringLiteral) { if (CurrentToken is IntLiteralToken token) { stringLiteral = token; Next(); return true; } stringLiteral = null; return false; } private StringLiteralToken ExpectStringLiteral() { var token = ExpectToken(); if (token is not StringLiteralToken identifier) { throw new CompileException(Diagnostic .Error($"Expected string literal, but found {token.GetType().Name}") .WithHelp("Provide a valid string literal") .At(token) .Build()); } return identifier; } private void Next() { _tokenIndex++; } private List GetTokens(int tokenStartIndex) { return _tokens.Skip(tokenStartIndex).Take(_tokenIndex - tokenStartIndex).ToList(); } } public record SyntaxTree(List TopLevelSyntaxNodes);