using System.Diagnostics.CodeAnalysis; using NubLang.Diagnostics; using NubLang.Parsing.Syntax; using NubLang.Tokenization; namespace NubLang.Parsing; public sealed class Parser { private readonly List _diagnostics = []; private IReadOnlyList _tokens = []; private int _tokenIndex; private string _moduleName = string.Empty; private Token? CurrentToken => _tokenIndex < _tokens.Count ? _tokens[_tokenIndex] : null; private bool HasToken => CurrentToken != null; public IReadOnlyList GetDiagnostics() { return _diagnostics; } public SyntaxTree Parse(IReadOnlyList tokens) { _diagnostics.Clear(); _tokens = tokens; _tokenIndex = 0; _moduleName = string.Empty; var metadata = ParseMetadata(); var definitions = ParseDefinitions(); return new SyntaxTree(definitions, metadata); } private SyntaxTreeMetadata ParseMetadata() { var imports = new List(); try { ExpectSymbol(Symbol.Module); _moduleName = ExpectLiteral(LiteralKind.String).Value; while (TryExpectSymbol(Symbol.Import)) { imports.Add(ExpectIdentifier().Value); } } catch (ParseException e) { _diagnostics.Add(e.Diagnostic); while (HasToken) { if (CurrentToken is SymbolToken { Symbol: Symbol.Module or Symbol.Import }) { break; } Next(); } } return new SyntaxTreeMetadata(_moduleName, imports); } private List ParseDefinitions() { var definitions = new List(); while (HasToken) { try { var startIndex = _tokenIndex; var exported = TryExpectSymbol(Symbol.Export); if (TryExpectSymbol(Symbol.Extern)) { var externSymbol = ExpectLiteral(LiteralKind.String).Value; ExpectSymbol(Symbol.Func); definitions.Add(ParseFunc(startIndex, exported, externSymbol)); continue; } var keyword = ExpectSymbol(); var definition = keyword.Symbol switch { Symbol.Func => ParseFunc(startIndex, exported, null), Symbol.Struct => ParseStruct(startIndex, exported), Symbol.Interface => ParseInterface(startIndex, exported), _ => throw new ParseException(Diagnostic .Error($"Expected 'func', 'struct' or 'interface' but found '{keyword.Symbol}'") .WithHelp("Valid definition keywords are 'func', 'struct' and 'interface'") .At(keyword) .Build()) }; definitions.Add(definition); } catch (ParseException e) { _diagnostics.Add(e.Diagnostic); while (HasToken) { if (CurrentToken is SymbolToken { Symbol: Symbol.Extern or Symbol.Func or Symbol.Struct or Symbol.Interface }) { break; } Next(); } } } return definitions; } private FuncSignatureSyntax ParseFuncSignature() { var startIndex = _tokenIndex; List parameters = []; ExpectSymbol(Symbol.OpenParen); while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseFuncParameter()); if (!TryExpectSymbol(Symbol.Comma)) { ExpectSymbol(Symbol.CloseParen); break; } } var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new VoidTypeSyntax([]); return new FuncSignatureSyntax(GetTokens(startIndex), parameters, returnType); } private FuncParameterSyntax ParseFuncParameter() { var startIndex = _tokenIndex; var name = ExpectIdentifier(); ExpectSymbol(Symbol.Colon); var type = ParseType(); return new FuncParameterSyntax(GetTokens(startIndex), name.Value, type); } private FuncSyntax ParseFunc(int startIndex, bool exported, string? externSymbol) { var name = ExpectIdentifier(); var signature = ParseFuncSignature(); BlockSyntax? body = null; if (CurrentToken is SymbolToken { Symbol: Symbol.OpenBrace }) { Next(); body = ParseBlock(); } return new FuncSyntax(GetTokens(startIndex), name.Value, exported, externSymbol, signature, body); } private DefinitionSyntax ParseStruct(int startIndex, bool exported) { var name = ExpectIdentifier(); var interfaceImplementations = new List(); if (TryExpectSymbol(Symbol.Colon)) { do { var interfaceType = ParseType(); interfaceImplementations.Add(interfaceType); } while (TryExpectSymbol(Symbol.Comma)); } ExpectSymbol(Symbol.OpenBrace); List fields = []; List funcs = []; var fieldIndex = 0; while (!TryExpectSymbol(Symbol.CloseBrace)) { var memberStartIndex = _tokenIndex; if (TryExpectSymbol(Symbol.Func)) { var funcName = ExpectIdentifier().Value; var funcSignature = ParseFuncSignature(); var funcBody = ParseBlock(); funcs.Add(new StructFuncSyntax(GetTokens(memberStartIndex), funcName, funcSignature, funcBody)); } else { var fieldName = ExpectIdentifier().Value; ExpectSymbol(Symbol.Colon); var fieldType = ParseType(); var fieldValue = Optional.Empty(); if (TryExpectSymbol(Symbol.Assign)) { fieldValue = ParseExpression(); } fields.Add(new StructFieldSyntax(GetTokens(memberStartIndex), fieldIndex++, fieldName, fieldType, fieldValue)); } } return new StructSyntax(GetTokens(startIndex), name.Value, exported, fields, funcs, interfaceImplementations); } private InterfaceSyntax ParseInterface(int startIndex, bool exported) { var name = ExpectIdentifier(); ExpectSymbol(Symbol.OpenBrace); List functions = []; while (!TryExpectSymbol(Symbol.CloseBrace)) { var funcStartIndex = _tokenIndex; ExpectSymbol(Symbol.Func); var funcName = ExpectIdentifier().Value; var signature = ParseFuncSignature(); functions.Add(new InterfaceFuncSyntax(GetTokens(funcStartIndex), funcName, signature)); } return new InterfaceSyntax(GetTokens(startIndex), name.Value, exported, functions); } private StatementSyntax ParseStatement() { if (CurrentToken is SymbolToken symbol) { switch (symbol.Symbol) { case Symbol.Return: return ParseReturn(); case Symbol.If: return ParseIf(); case Symbol.While: return ParseWhile(); case Symbol.Let: return ParseVariableDeclaration(); case Symbol.Break: return ParseBreak(); case Symbol.Continue: return ParseContinue(); } } return ParseStatementExpression(); } private StatementSyntax ParseStatementExpression() { var startIndex = _tokenIndex; var expr = ParseExpression(); if (TryExpectSymbol(Symbol.Assign)) { var value = ParseExpression(); return new AssignmentSyntax(GetTokens(startIndex), expr, value); } return new StatementExpressionSyntax(GetTokens(startIndex), expr); } private VariableDeclarationSyntax ParseVariableDeclaration() { var startIndex = _tokenIndex; ExpectSymbol(Symbol.Let); var name = ExpectIdentifier().Value; var explicitType = Optional.Empty(); if (TryExpectSymbol(Symbol.Colon)) { explicitType = ParseType(); } var assignment = Optional.Empty(); if (TryExpectSymbol(Symbol.Assign)) { assignment = ParseExpression(); } return new VariableDeclarationSyntax(GetTokens(startIndex), name, explicitType, assignment); } private StatementSyntax ParseBreak() { var startIndex = _tokenIndex; ExpectSymbol(Symbol.Break); return new BreakSyntax(GetTokens(startIndex)); } private StatementSyntax ParseContinue() { var startIndex = _tokenIndex; ExpectSymbol(Symbol.Continue); return new ContinueSyntax(GetTokens(startIndex)); } private ReturnSyntax ParseReturn() { var startIndex = _tokenIndex; ExpectSymbol(Symbol.Return); var value = Optional.Empty(); if (!TryExpectSymbol(Symbol.Semi)) { value = ParseExpression(); } return new ReturnSyntax(GetTokens(startIndex), value); } private IfSyntax ParseIf() { var startIndex = _tokenIndex; ExpectSymbol(Symbol.If); var condition = ParseExpression(); var body = ParseBlock(); var elseStatement = Optional>.Empty(); if (TryExpectSymbol(Symbol.Else)) { elseStatement = TryExpectSymbol(Symbol.If) ? (Variant)ParseIf() : (Variant)ParseBlock(); } return new IfSyntax(GetTokens(startIndex), condition, body, elseStatement); } private WhileSyntax ParseWhile() { var startIndex = _tokenIndex; ExpectSymbol(Symbol.While); var condition = ParseExpression(); var body = ParseBlock(); return new WhileSyntax(GetTokens(startIndex), condition, body); } private ExpressionSyntax ParseExpression(int precedence = 0) { var startIndex = _tokenIndex; var left = ParsePrimaryExpression(); while (CurrentToken is SymbolToken symbolToken && TryGetBinaryOperator(symbolToken.Symbol, out var op) && GetBinaryOperatorPrecedence(op.Value) >= precedence) { Next(); var right = ParseExpression(GetBinaryOperatorPrecedence(op.Value) + 1); left = new BinaryExpressionSyntax(GetTokens(startIndex), left, op.Value, right); } return left; } private static int GetBinaryOperatorPrecedence(BinaryOperatorSyntax operatorSyntax) { return operatorSyntax switch { BinaryOperatorSyntax.Multiply => 10, BinaryOperatorSyntax.Divide => 10, BinaryOperatorSyntax.Modulo => 10, BinaryOperatorSyntax.Plus => 9, BinaryOperatorSyntax.Minus => 9, BinaryOperatorSyntax.LeftShift => 8, BinaryOperatorSyntax.RightShift => 8, BinaryOperatorSyntax.GreaterThan => 7, BinaryOperatorSyntax.GreaterThanOrEqual => 7, BinaryOperatorSyntax.LessThan => 7, BinaryOperatorSyntax.LessThanOrEqual => 7, BinaryOperatorSyntax.Equal => 7, BinaryOperatorSyntax.NotEqual => 7, BinaryOperatorSyntax.BitwiseAnd => 6, BinaryOperatorSyntax.BitwiseXor => 5, BinaryOperatorSyntax.BitwiseOr => 4, BinaryOperatorSyntax.LogicalAnd => 3, BinaryOperatorSyntax.LogicalOr => 2, _ => throw new ArgumentOutOfRangeException(nameof(operatorSyntax), operatorSyntax, null) }; } private bool TryGetBinaryOperator(Symbol symbol, [NotNullWhen(true)] out BinaryOperatorSyntax? binaryExpressionOperator) { switch (symbol) { case Symbol.Equal: binaryExpressionOperator = BinaryOperatorSyntax.Equal; return true; case Symbol.NotEqual: binaryExpressionOperator = BinaryOperatorSyntax.NotEqual; return true; case Symbol.LessThan: binaryExpressionOperator = BinaryOperatorSyntax.LessThan; return true; case Symbol.LessThanOrEqual: binaryExpressionOperator = BinaryOperatorSyntax.LessThanOrEqual; return true; case Symbol.GreaterThan: binaryExpressionOperator = BinaryOperatorSyntax.GreaterThan; return true; case Symbol.GreaterThanOrEqual: binaryExpressionOperator = BinaryOperatorSyntax.GreaterThanOrEqual; return true; case Symbol.And: binaryExpressionOperator = BinaryOperatorSyntax.LogicalAnd; return true; case Symbol.Or: binaryExpressionOperator = BinaryOperatorSyntax.LogicalOr; return true; case Symbol.Plus: binaryExpressionOperator = BinaryOperatorSyntax.Plus; return true; case Symbol.Minus: binaryExpressionOperator = BinaryOperatorSyntax.Minus; return true; case Symbol.Star: binaryExpressionOperator = BinaryOperatorSyntax.Multiply; return true; case Symbol.ForwardSlash: binaryExpressionOperator = BinaryOperatorSyntax.Divide; return true; case Symbol.Percent: binaryExpressionOperator = BinaryOperatorSyntax.Modulo; return true; case Symbol.LeftShift: binaryExpressionOperator = BinaryOperatorSyntax.LeftShift; return true; case Symbol.RightShift: binaryExpressionOperator = BinaryOperatorSyntax.RightShift; return true; case Symbol.Ampersand: binaryExpressionOperator = BinaryOperatorSyntax.BitwiseAnd; return true; case Symbol.Pipe: binaryExpressionOperator = BinaryOperatorSyntax.BitwiseOr; return true; case Symbol.Caret: binaryExpressionOperator = BinaryOperatorSyntax.BitwiseXor; return true; default: binaryExpressionOperator = null; return false; } } private ExpressionSyntax ParsePrimaryExpression() { var startIndex = _tokenIndex; var token = ExpectToken(); var expr = token switch { LiteralToken literal => new LiteralSyntax(GetTokens(startIndex), literal.Value, literal.Kind), IdentifierToken identifier => new IdentifierSyntax(GetTokens(startIndex), Optional.Empty(), identifier.Value), SymbolToken symbolToken => symbolToken.Symbol switch { Symbol.OpenParen => ParseParenthesizedExpression(), Symbol.Minus => new UnaryExpressionSyntax(GetTokens(startIndex), UnaryOperatorSyntax.Negate, ParsePrimaryExpression()), Symbol.Bang => new UnaryExpressionSyntax(GetTokens(startIndex), UnaryOperatorSyntax.Invert, ParsePrimaryExpression()), Symbol.OpenBracket => ParseArrayInitializer(startIndex), Symbol.OpenBrace => new StructInitializerSyntax(GetTokens(startIndex), Optional.Empty(), ParseStructInitializerBody()), Symbol.Struct => ParseStructInitializer(startIndex), _ => throw new ParseException(Diagnostic .Error($"Unexpected symbol '{symbolToken.Symbol}' in expression") .WithHelp("Expected '(', '-', '!', '[' or '{'") .At(symbolToken) .Build()) }, _ => throw new ParseException(Diagnostic .Error($"Unexpected token '{token.GetType().Name}' in expression") .WithHelp("Expected literal, identifier, or parenthesized expression") .At(token) .Build()) }; return ParsePostfixOperators(expr); } private ExpressionSyntax ParseParenthesizedExpression() { var expression = ParseExpression(); ExpectSymbol(Symbol.CloseParen); return expression; } private ExpressionSyntax ParsePostfixOperators(ExpressionSyntax expr) { var startIndex = _tokenIndex; while (HasToken) { if (TryExpectSymbol(Symbol.Ampersand)) { expr = new AddressOfSyntax(GetTokens(startIndex), expr); continue; } if (TryExpectSymbol(Symbol.Caret)) { expr = new DereferenceSyntax(GetTokens(startIndex), expr); continue; } if (TryExpectSymbol(Symbol.Period)) { var member = ExpectIdentifier().Value; if (TryExpectSymbol(Symbol.OpenParen)) { var parameters = new List(); while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseExpression()); if (!TryExpectSymbol(Symbol.Comma)) { ExpectSymbol(Symbol.CloseParen); break; } } expr = new DotFuncCallSyntax(GetTokens(startIndex), member, expr, parameters); continue; } expr = new StructFieldAccessSyntax(GetTokens(startIndex), expr, member); continue; } if (TryExpectSymbol(Symbol.OpenBracket)) { var index = ParseExpression(); ExpectSymbol(Symbol.CloseBracket); expr = new ArrayIndexAccessSyntax(GetTokens(startIndex), expr, index); continue; } if (TryExpectSymbol(Symbol.OpenParen)) { var parameters = new List(); while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseExpression()); if (!TryExpectSymbol(Symbol.Comma)) { ExpectSymbol(Symbol.CloseParen); break; } } expr = new FuncCallSyntax(GetTokens(startIndex), expr, parameters); continue; } break; } return expr; } private ArrayInitializerSyntax ParseArrayInitializer(int startIndex) { var capacity = ParseExpression(); ExpectSymbol(Symbol.CloseBracket); var type = ParseType(); return new ArrayInitializerSyntax(GetTokens(startIndex), capacity, type); } private StructInitializerSyntax ParseStructInitializer(int startIndex) { var type = Optional.Empty(); if (!TryExpectSymbol(Symbol.OpenBrace)) { type = ParseType(); ExpectSymbol(Symbol.OpenBrace); } var initializers = ParseStructInitializerBody(); return new StructInitializerSyntax(GetTokens(startIndex), type, initializers); } private Dictionary ParseStructInitializerBody() { Dictionary initializers = []; while (!TryExpectSymbol(Symbol.CloseBrace)) { var name = ExpectIdentifier().Value; ExpectSymbol(Symbol.Assign); var value = ParseExpression(); initializers.Add(name, value); } return initializers; } private BlockSyntax ParseBlock() { var startIndex = _tokenIndex; List statements = []; while (!TryExpectSymbol(Symbol.CloseBrace)) { try { statements.Add(ParseStatement()); } catch (ParseException ex) { _diagnostics.Add(ex.Diagnostic); Next(); } } return new BlockSyntax(GetTokens(startIndex), statements); } private TypeSyntax ParseType() { var startIndex = _tokenIndex; if (TryExpectIdentifier(out var name)) { if (name.Value[0] == 'u' && int.TryParse(name.Value[1..], out var size)) { if (size is not 8 and not 16 and not 32 and not 64) { throw new ParseException(Diagnostic .Error("Arbitrary uint size is not supported") .WithHelp("Use u8, u16, u32 or u64") .At(name) .Build()); } return new IntTypeSyntax(GetTokens(startIndex), false, size); } if (name.Value[0] == 'i' && int.TryParse(name.Value[1..], out size)) { if (size is not 8 and not 16 and not 32 and not 64) { throw new ParseException(Diagnostic .Error("Arbitrary int size is not supported") .WithHelp("Use i8, i16, i32 or i64") .At(name) .Build()); } return new IntTypeSyntax(GetTokens(startIndex), true, size); } if (name.Value[0] == 'f' && int.TryParse(name.Value[1..], out size)) { if (size is not 32 and not 64) { throw new ParseException(Diagnostic .Error("Arbitrary float size is not supported") .WithHelp("Use f32 or f64") .At(name) .Build()); } return new FloatTypeSyntax(GetTokens(startIndex), size); } return name.Value switch { "void" => new VoidTypeSyntax(GetTokens(startIndex)), "string" => new StringTypeSyntax(GetTokens(startIndex)), "cstring" => new CStringTypeSyntax(GetTokens(startIndex)), "bool" => new BoolTypeSyntax(GetTokens(startIndex)), _ => new CustomTypeSyntax(GetTokens(startIndex), _moduleName, name.Value) }; } if (TryExpectSymbol(Symbol.Caret)) { var baseType = ParseType(); return new PointerTypeSyntax(GetTokens(startIndex), baseType); } if (TryExpectSymbol(Symbol.Func)) { ExpectSymbol(Symbol.OpenParen); List parameters = []; while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseType()); if (!TryExpectSymbol(Symbol.Comma)) { ExpectSymbol(Symbol.CloseParen); break; } } var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new VoidTypeSyntax([]); return new FuncTypeSyntax(GetTokens(startIndex), parameters, returnType); } if (TryExpectSymbol(Symbol.OpenBracket)) { ExpectSymbol(Symbol.CloseBracket); var baseType = ParseType(); return new ArrayTypeSyntax(GetTokens(startIndex), baseType); } throw new ParseException(Diagnostic .Error("Invalid type syntax") .WithHelp("Expected type name, '^' for pointer, or '[]' for array") .At(CurrentToken) .Build()); } private Token ExpectToken() { if (!HasToken) { throw new ParseException(Diagnostic .Error("Unexpected end of file") .WithHelp("Expected more tokens to complete the syntax") .At(_tokens[^1]) .Build()); } var token = CurrentToken!; Next(); return token; } private SymbolToken ExpectSymbol() { var token = ExpectToken(); if (token is not SymbolToken symbol) { throw new ParseException(Diagnostic .Error($"Expected symbol, but found {token.GetType().Name}") .WithHelp("This position requires a symbol like '(', ')', '{', '}', etc.") .At(token) .Build()); } return symbol; } private void ExpectSymbol(Symbol expectedSymbol) { var token = ExpectSymbol(); if (token.Symbol != expectedSymbol) { throw new ParseException(Diagnostic .Error($"Expected '{expectedSymbol}', but found '{token.Symbol}'") .WithHelp($"Insert '{expectedSymbol}' here") .At(token) .Build()); } } private bool TryExpectSymbol(Symbol symbol) { if (CurrentToken is SymbolToken symbolToken && symbolToken.Symbol == symbol) { Next(); return true; } return false; } private bool TryExpectIdentifier([NotNullWhen(true)] out IdentifierToken? identifier) { if (CurrentToken is IdentifierToken identifierToken) { identifier = identifierToken; Next(); return true; } identifier = null; return false; } private IdentifierToken ExpectIdentifier() { var token = ExpectToken(); if (token is not IdentifierToken identifier) { throw new ParseException(Diagnostic .Error($"Expected identifier, but found {token.GetType().Name}") .WithHelp("Provide a valid identifier name here") .At(token) .Build()); } return identifier; } private LiteralToken ExpectLiteral() { var token = ExpectToken(); if (token is not LiteralToken identifier) { throw new ParseException(Diagnostic .Error($"Expected literal, but found {token.GetType().Name}") .WithHelp("Provide a valid literal name here") .At(token) .Build()); } return identifier; } private LiteralToken ExpectLiteral(LiteralKind kind) { var literal = ExpectLiteral(); if (literal.Kind != kind) { throw new ParseException(Diagnostic .Error($"Expected {kind} literal, but found {literal.Kind}") .WithHelp($"Provide a {kind} literal name here") .At(literal) .Build()); } return literal; } private bool TryExpectLiteral(LiteralKind kind, [NotNullWhen(true)] out LiteralToken? literal) { if (CurrentToken is LiteralToken identifierToken) { literal = identifierToken; Next(); return true; } literal = null; return false; } private void Next() { _tokenIndex++; } private IEnumerable GetTokens(int tokenStartIndex) { return _tokens.Skip(tokenStartIndex).Take(_tokenIndex - tokenStartIndex); } } public class ParseException : Exception { public Diagnostic Diagnostic { get; } public ParseException(Diagnostic diagnostic) : base(diagnostic.Message) { Diagnostic = diagnostic; } }