using System.Diagnostics.CodeAnalysis; using NubLang.Common; using NubLang.Diagnostics; using NubLang.Parsing.Syntax; using NubLang.Tokenization; namespace NubLang.Parsing; public sealed class Parser { private IEnumerator _tokenEnumerator = null!; private readonly List _diagnostics = []; private Token? _currentToken; private bool _hasCurrentToken; public IReadOnlyList GetDiagnostics() { return _diagnostics; } public SyntaxTree Parse(IEnumerable tokens) { _diagnostics.Clear(); _tokenEnumerator = tokens.GetEnumerator(); _hasCurrentToken = _tokenEnumerator.MoveNext(); _currentToken = _hasCurrentToken ? _tokenEnumerator.Current : null; var definitions = new List(); while (_hasCurrentToken) { try { var keyword = ExpectSymbol(); var definition = keyword.Symbol switch { Symbol.Extern => ParseExtern(), Symbol.Func => ParseFunc(), Symbol.Struct => ParseStruct(), Symbol.Interface => ParseInterface(), _ => throw new ParseException(Diagnostic .Error($"Expected 'extern', 'func', 'struct' or 'interface' but found '{keyword.Symbol}'") .WithHelp("Valid definition keywords are 'extern', 'func', 'struct' and 'interface'") .Build()) }; definitions.Add(definition); } catch (ParseException ex) { _diagnostics.Add(ex.Diagnostic); while (_hasCurrentToken) { if (_currentToken is SymbolToken { Symbol: Symbol.Extern or Symbol.Func or Symbol.Struct or Symbol.Interface }) { break; } Next(); } } } return new SyntaxTree(definitions); } private FuncSignatureSyntax ParseFuncSignature(FuncParameterSyntax? thisArg = null) { List parameters = []; if (thisArg != null) { parameters.Add(thisArg); } ExpectSymbol(Symbol.OpenParen); while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseFuncParameter()); if (!TryExpectSymbol(Symbol.Comma) && _currentToken is not SymbolToken { Symbol: Symbol.CloseParen }) { _diagnostics.Add(Diagnostic .Warning("Missing comma between function parameters") .WithHelp("Add a ',' to separate parameters") .Build()); } } var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new VoidTypeSyntax(); return new FuncSignatureSyntax(parameters, returnType); } private FuncParameterSyntax ParseFuncParameter() { var name = ExpectIdentifier(); ExpectSymbol(Symbol.Colon); var type = ParseType(); return new FuncParameterSyntax(name.Value, type); } private DefinitionSyntax ParseExtern() { var keyword = ExpectSymbol(); return keyword.Symbol switch { Symbol.Func => ParseExternFunc(), _ => throw new ParseException(Diagnostic.Error($"Unexpected symbol {keyword.Symbol} after extern declaration").Build()) }; } private ExternFuncSyntax ParseExternFunc() { var name = ExpectIdentifier(); var callName = name.Value; if (TryExpectSymbol(Symbol.Calls)) { callName = ExpectIdentifier().Value; } var signature = ParseFuncSignature(); return new ExternFuncSyntax(name.Value, callName, signature); } private LocalFuncSyntax ParseFunc() { var name = ExpectIdentifier(); var signature = ParseFuncSignature(); var body = ParseBlock(); return new LocalFuncSyntax(name.Value, signature, body); } private DefinitionSyntax ParseStruct() { var name = ExpectIdentifier(); ExpectSymbol(Symbol.OpenBrace); List fields = []; List funcs = []; var fieldIndex = 0; while (!TryExpectSymbol(Symbol.CloseBrace)) { if (TryExpectSymbol(Symbol.Func)) { var funcName = ExpectIdentifier().Value; var thisArg = new FuncParameterSyntax("this", new CustomTypeSyntax(name.Value)); var funcSignature = ParseFuncSignature(thisArg); var funcBody = ParseBlock(); funcs.Add(new StructFuncSyntax(funcName, funcSignature, funcBody)); } else { var fieldName = ExpectIdentifier().Value; ExpectSymbol(Symbol.Colon); var fieldType = ParseType(); var fieldValue = Optional.Empty(); if (TryExpectSymbol(Symbol.Assign)) { fieldValue = ParseExpression(); } fields.Add(new StructFieldSyntax(fieldIndex++, fieldName, fieldType, fieldValue)); } } return new StructSyntax(name.Value, fields, funcs); } private InterfaceSyntax ParseInterface() { var name = ExpectIdentifier(); ExpectSymbol(Symbol.OpenBrace); List functions = []; while (!TryExpectSymbol(Symbol.CloseBrace)) { ExpectSymbol(Symbol.Func); var funcName = ExpectIdentifier().Value; var signature = ParseFuncSignature(); functions.Add(new InterfaceFuncSyntax(funcName, signature)); } return new InterfaceSyntax(name.Value, functions); } private StatementSyntax ParseStatement() { if (_currentToken is SymbolToken symbol) { switch (symbol.Symbol) { case Symbol.Return: return ParseReturn(); case Symbol.If: return ParseIf(); case Symbol.While: return ParseWhile(); case Symbol.Let: return ParseVariableDeclaration(); case Symbol.Break: return ParseBreak(); case Symbol.Continue: return ParseContinue(); } } return ParseStatementExpression(); } private StatementSyntax ParseStatementExpression() { var expr = ParseExpression(); if (TryExpectSymbol(Symbol.Assign)) { var value = ParseExpression(); return new AssignmentSyntax(expr, value); } return new StatementExpressionSyntax(expr); } private VariableDeclarationSyntax ParseVariableDeclaration() { ExpectSymbol(Symbol.Let); var name = ExpectIdentifier().Value; var explicitType = Optional.Empty(); if (TryExpectSymbol(Symbol.Colon)) { explicitType = ParseType(); } var assignment = Optional.Empty(); if (TryExpectSymbol(Symbol.Assign)) { assignment = ParseExpression(); } return new VariableDeclarationSyntax(name, explicitType, assignment); } private StatementSyntax ParseBreak() { ExpectSymbol(Symbol.Break); return new BreakSyntax(); } private StatementSyntax ParseContinue() { ExpectSymbol(Symbol.Continue); return new ContinueSyntax(); } private ReturnSyntax ParseReturn() { ExpectSymbol(Symbol.Return); var value = Optional.Empty(); if (!TryExpectSymbol(Symbol.Semi)) { value = ParseExpression(); } return new ReturnSyntax(value); } private IfSyntax ParseIf() { ExpectSymbol(Symbol.If); var condition = ParseExpression(); var body = ParseBlock(); var elseStatement = Optional>.Empty(); if (TryExpectSymbol(Symbol.Else)) { elseStatement = TryExpectSymbol(Symbol.If) ? (Variant)ParseIf() : (Variant)ParseBlock(); } return new IfSyntax(condition, body, elseStatement); } private WhileSyntax ParseWhile() { ExpectSymbol(Symbol.While); var condition = ParseExpression(); var body = ParseBlock(); return new WhileSyntax(condition, body); } private ExpressionSyntax ParseExpression(int precedence = 0) { var left = ParsePrimaryExpression(); while (_currentToken is SymbolToken symbolToken && TryGetBinaryOperator(symbolToken.Symbol, out var op) && GetBinaryOperatorPrecedence(op.Value) >= precedence) { Next(); var right = ParseExpression(GetBinaryOperatorPrecedence(op.Value) + 1); left = new BinaryExpressionSyntax(left, op.Value, right); } return left; } private int GetBinaryOperatorPrecedence(BinaryOperatorSyntax operatorSyntax) { return operatorSyntax switch { BinaryOperatorSyntax.Multiply => 3, BinaryOperatorSyntax.Divide => 3, BinaryOperatorSyntax.Plus => 2, BinaryOperatorSyntax.Minus => 2, BinaryOperatorSyntax.GreaterThan => 1, BinaryOperatorSyntax.GreaterThanOrEqual => 1, BinaryOperatorSyntax.LessThan => 1, BinaryOperatorSyntax.LessThanOrEqual => 1, BinaryOperatorSyntax.Equal => 0, BinaryOperatorSyntax.NotEqual => 0, _ => throw new ArgumentOutOfRangeException(nameof(operatorSyntax), operatorSyntax, null) }; } private bool TryGetBinaryOperator(Symbol symbol, [NotNullWhen(true)] out BinaryOperatorSyntax? binaryExpressionOperator) { switch (symbol) { case Symbol.Equal: binaryExpressionOperator = BinaryOperatorSyntax.Equal; return true; case Symbol.NotEqual: binaryExpressionOperator = BinaryOperatorSyntax.NotEqual; return true; case Symbol.LessThan: binaryExpressionOperator = BinaryOperatorSyntax.LessThan; return true; case Symbol.LessThanOrEqual: binaryExpressionOperator = BinaryOperatorSyntax.LessThanOrEqual; return true; case Symbol.GreaterThan: binaryExpressionOperator = BinaryOperatorSyntax.GreaterThan; return true; case Symbol.GreaterThanOrEqual: binaryExpressionOperator = BinaryOperatorSyntax.GreaterThanOrEqual; return true; case Symbol.Plus: binaryExpressionOperator = BinaryOperatorSyntax.Plus; return true; case Symbol.Minus: binaryExpressionOperator = BinaryOperatorSyntax.Minus; return true; case Symbol.Star: binaryExpressionOperator = BinaryOperatorSyntax.Multiply; return true; case Symbol.ForwardSlash: binaryExpressionOperator = BinaryOperatorSyntax.Divide; return true; default: binaryExpressionOperator = null; return false; } } private ExpressionSyntax ParsePrimaryExpression() { var token = ExpectToken(); var expr = token switch { LiteralToken literal => new LiteralSyntax(literal.Value, literal.Kind), IdentifierToken identifier => new IdentifierSyntax(identifier.Value), SymbolToken symbolToken => symbolToken.Symbol switch { Symbol.Func => ParseArrowFunction(), Symbol.OpenParen => ParseParenthesizedExpression(), Symbol.Minus => new UnaryExpressionSyntax(UnaryOperatorSyntax.Negate, ParsePrimaryExpression()), Symbol.Bang => new UnaryExpressionSyntax(UnaryOperatorSyntax.Invert, ParsePrimaryExpression()), Symbol.OpenBracket => ParseArrayInitializer(), Symbol.Alloc => ParseStructInitializer(), _ => throw new ParseException(Diagnostic .Error($"Unexpected symbol '{symbolToken.Symbol}' in expression") .WithHelp("Expected literal, identifier, or '(' to start expression") .Build()) }, _ => throw new ParseException(Diagnostic .Error($"Unexpected token '{token.GetType().Name}' in expression") .WithHelp("Expected literal, identifier, or parenthesized expression") .Build()) }; return ParsePostfixOperators(expr); } private ExpressionSyntax ParseArrowFunction() { List parameters = []; ExpectSymbol(Symbol.OpenParen); while (!TryExpectSymbol(Symbol.CloseParen)) { var name = ExpectIdentifier(); parameters.Add(new ArrowFuncParameterSyntax(name.Value)); } ExpectSymbol(Symbol.Arrow); BlockSyntax body; if (_currentToken is SymbolToken { Symbol: Symbol.OpenBrace }) { var returnValue = ParseExpression(); var arrowExpression = new ReturnSyntax(returnValue); body = new BlockSyntax([arrowExpression]); } else { body = ParseBlock(); } return new ArrowFuncSyntax(parameters, body); } private ExpressionSyntax ParseParenthesizedExpression() { var expression = ParseExpression(); ExpectSymbol(Symbol.CloseParen); return expression; } private ExpressionSyntax ParseArrayInitializer() { var capacity = ParseExpression(); ExpectSymbol(Symbol.CloseBracket); var type = ParseType(); return new ArrayInitializerSyntax(capacity, type); } private ExpressionSyntax ParseStructInitializer() { var type = ParseType(); Dictionary initializers = []; ExpectSymbol(Symbol.OpenBrace); while (!TryExpectSymbol(Symbol.CloseBrace)) { var name = ExpectIdentifier().Value; ExpectSymbol(Symbol.Assign); var value = ParseExpression(); initializers.Add(name, value); } return new StructInitializerSyntax(type, initializers); } private ExpressionSyntax ParsePostfixOperators(ExpressionSyntax expr) { while (_hasCurrentToken) { if (TryExpectSymbol(Symbol.Ampersand)) { expr = new AddressOfSyntax(expr); continue; } if (TryExpectSymbol(Symbol.Caret)) { expr = new DereferenceSyntax(expr); continue; } if (TryExpectSymbol(Symbol.Period)) { var structMember = ExpectIdentifier().Value; expr = new MemberAccessSyntax(expr, structMember); continue; } if (TryExpectSymbol(Symbol.OpenBracket)) { var index = ParseExpression(); ExpectSymbol(Symbol.CloseBracket); expr = new ArrayIndexAccessSyntax(expr, index); continue; } if (TryExpectSymbol(Symbol.OpenParen)) { var parameters = new List(); while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseExpression()); if (!TryExpectSymbol(Symbol.Comma) && _currentToken is not SymbolToken { Symbol: Symbol.CloseParen }) { _diagnostics.Add(Diagnostic .Warning("Missing comma between function arguments") .WithHelp("Add a ',' to separate arguments") .Build()); } } expr = new FuncCallSyntax(expr, parameters); continue; } break; } return expr; } private BlockSyntax ParseBlock() { ExpectSymbol(Symbol.OpenBrace); List statements = []; while (!TryExpectSymbol(Symbol.CloseBrace)) { try { statements.Add(ParseStatement()); } catch (ParseException ex) { _diagnostics.Add(ex.Diagnostic); Next(); } } return new BlockSyntax(statements); } private TypeSyntax ParseType() { if (TryExpectIdentifier(out var name)) { return name.Value switch { "void" => new VoidTypeSyntax(), "string" => new StringTypeSyntax(), "cstring" => new CStringTypeSyntax(), "i64" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.I64), "i32" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.I32), "i16" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.I16), "i8" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.I8), "u64" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.U64), "u32" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.U32), "u16" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.U16), "u8" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.U8), "f64" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.F64), "f32" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.F32), "bool" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.Bool), _ => new CustomTypeSyntax(name.Value) }; } if (TryExpectSymbol(Symbol.Caret)) { var baseType = ParseType(); return new PointerTypeSyntax(baseType); } if (TryExpectSymbol(Symbol.Func)) { ExpectSymbol(Symbol.OpenParen); List parameters = []; while (!TryExpectSymbol(Symbol.CloseParen)) { var parameter = ParseType(); parameters.Add(parameter); if (!TryExpectSymbol(Symbol.Comma) && _currentToken is not SymbolToken { Symbol: Symbol.CloseParen }) { _diagnostics.Add(Diagnostic .Warning("Missing comma between func type arguments") .WithHelp("Add a ',' to separate arguments") .Build()); } } var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new VoidTypeSyntax(); return new FuncTypeSyntax(parameters, returnType); } if (TryExpectSymbol(Symbol.OpenBracket)) { ExpectSymbol(Symbol.CloseBracket); var baseType = ParseType(); return new ArrayTypeSyntax(baseType); } throw new ParseException(Diagnostic .Error("Invalid type syntax") .WithHelp("Expected type name, '^' for pointer, or '[]' for array") .Build()); } private Token ExpectToken() { if (!_hasCurrentToken) { throw new ParseException(Diagnostic .Error("Unexpected end of file") .WithHelp("Expected more tokens to complete the syntax") .Build()); } var token = _currentToken!; Next(); return token; } private SymbolToken ExpectSymbol() { var token = ExpectToken(); if (token is not SymbolToken symbol) { throw new ParseException(Diagnostic .Error($"Expected symbol, but found {token.GetType().Name}") .WithHelp("This position requires a symbol like '(', ')', '{', '}', etc.") .Build()); } return symbol; } private void ExpectSymbol(Symbol expectedSymbol) { var token = ExpectSymbol(); if (token.Symbol != expectedSymbol) { throw new ParseException(Diagnostic .Error($"Expected '{expectedSymbol}', but found '{token.Symbol}'") .WithHelp($"Insert '{expectedSymbol}' here") .Build()); } } private bool TryExpectSymbol(Symbol symbol) { if (_currentToken is SymbolToken symbolToken && symbolToken.Symbol == symbol) { Next(); return true; } return false; } private bool TryExpectIdentifier([NotNullWhen(true)] out IdentifierToken? identifier) { if (_currentToken is IdentifierToken identifierToken) { identifier = identifierToken; Next(); return true; } identifier = null; return false; } private IdentifierToken ExpectIdentifier() { var token = ExpectToken(); if (token is not IdentifierToken identifier) { throw new ParseException(Diagnostic .Error($"Expected identifier, but found {token.GetType().Name}") .WithHelp("Provide a valid identifier name here") .Build()); } return identifier; } private void Next() { _hasCurrentToken = _tokenEnumerator.MoveNext(); _currentToken = _hasCurrentToken ? _tokenEnumerator.Current : null; } } public class ParseException : Exception { public Diagnostic Diagnostic { get; } public ParseException(Diagnostic diagnostic) : base(diagnostic.Message) { Diagnostic = diagnostic; } }