using System.Diagnostics.CodeAnalysis; using Common; using Syntax.Diagnostics; using Syntax.Node; using Syntax.Tokenization; namespace Syntax.Parsing; public static class Parser { private static string _namespace = null!; private static NubType? _functionReturnType; private static List _diagnostics = []; private static IEnumerable _tokens = []; private static int _index; public static SyntaxTree? ParseFile(IEnumerable tokens, out IEnumerable diagnostics) { _tokens = tokens; _namespace = "global"; _diagnostics = []; _index = 0; _functionReturnType = null; if (TryExpectSymbol(Symbol.Namespace)) { _namespace = ExpectIdentifier().Value; } try { List definitions = []; while (Peek().HasValue) { var definition = ParseDefinition(); definitions.Add(definition); } diagnostics = _diagnostics; return new SyntaxTree(_namespace, definitions); } catch (ParseException ex) { _diagnostics.Add(ex.Diagnostic); RecoverToNextDefinition(); } diagnostics = _diagnostics; return null; } private static DefinitionNode ParseDefinition() { var startIndex = _index; List modifiers = []; while (TryExpectModifier(out var modifier)) { modifiers.Add(modifier); } var keyword = ExpectSymbol(); return keyword.Symbol switch { Symbol.Func => ParseFuncDefinition(startIndex, modifiers), Symbol.Struct => ParseStruct(startIndex, modifiers), Symbol.Trait => ParseTrait(startIndex, modifiers), Symbol.Impl => ParseImplementation(startIndex, modifiers), _ => throw new ParseException(Diagnostic .Error($"Expected 'func' or 'struct', but found '{keyword.Symbol}'") .WithHelp("Valid definition keywords are 'func' and 'struct'") .At(keyword) .Build()) }; } private static DefinitionNode ParseFuncDefinition(int startIndex, List modifiers) { var name = ExpectIdentifier(); List parameters = []; ExpectSymbol(Symbol.OpenParen); while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseFuncParameter()); if (!TryExpectSymbol(Symbol.Comma) && Peek().TryGetValue(out var token) && token is not SymbolToken { Symbol: Symbol.CloseParen }) { _diagnostics.Add(Diagnostic .Warning("Missing comma between function parameters") .WithHelp("Add a ',' to separate parameters") .At(token) .Build()); } } var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new NubVoidType(); _functionReturnType = returnType; var isExtern = modifiers.RemoveAll(x => x.Modifier == Modifier.Extern) > 0; if (isExtern) { if (modifiers.Count != 0) { throw new ParseException(Diagnostic .Error($"Invalid modifier for extern function: {modifiers[0].Modifier}") .WithHelp($"Extern functions cannot use the '{modifiers[0].Modifier}' modifier") .At(modifiers[0]) .Build()); } var callName = name.Value; if (TryExpectSymbol(Symbol.Calls)) { callName = ExpectIdentifier().Value; } return new ExternFuncDefinitionNode(GetTokensForNode(startIndex), _namespace, name.Value, callName, parameters, returnType); } var body = ParseBlock(); var exported = modifiers.RemoveAll(x => x.Modifier == Modifier.Export) > 0; if (modifiers.Count != 0) { throw new ParseException(Diagnostic .Error($"Invalid modifiers for function: {modifiers[0].Modifier}") .WithHelp($"Functions cannot use the '{modifiers[0].Modifier}' modifier") .At(modifiers[0]) .Build()); } return new LocalFuncDefinitionNode(GetTokensForNode(startIndex), _namespace, name.Value, parameters, body, returnType, exported); } private static StructDefinitionNode ParseStruct(int startIndex, List modifiers) { var name = ExpectIdentifier().Value; ExpectSymbol(Symbol.OpenBrace); List variables = []; while (!TryExpectSymbol(Symbol.CloseBrace)) { var variableName = ExpectIdentifier().Value; ExpectSymbol(Symbol.Colon); var variableType = ParseType(); var variableValue = Optional.Empty(); if (TryExpectSymbol(Symbol.Assign)) { variableValue = ParseExpression(); } variables.Add(new StructField(variableName, variableType, variableValue)); } if (modifiers.Count != 0) { throw new ParseException(Diagnostic .Error($"Invalid modifiers for struct: {modifiers[0].Modifier}") .WithHelp($"Structs cannot use the '{modifiers[0].Modifier}' modifier") .At(modifiers[0]) .Build()); } return new StructDefinitionNode(GetTokensForNode(startIndex), _namespace, name, variables); } private static TraitDefinitionNode ParseTrait(int startIndex, List modifiers) { var name = ExpectIdentifier().Value; ExpectSymbol(Symbol.OpenBrace); List functions = []; while (!TryExpectSymbol(Symbol.CloseBrace)) { ExpectSymbol(Symbol.Func); var funcName = ExpectIdentifier().Value; var parameters = new List(); ExpectSymbol(Symbol.OpenParen); while (!TryExpectSymbol(Symbol.CloseParen)) { var parameter = ParseFuncParameter(); parameters.Add(parameter); if (!TryExpectSymbol(Symbol.Comma) && Peek().TryGetValue(out var nextToken) && nextToken is not SymbolToken { Symbol: Symbol.CloseParen }) { _diagnostics.Add(Diagnostic .Warning("Missing comma between function arguments") .WithHelp("Add a ',' to separate arguments") .At(nextToken) .Build()); } } var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new NubVoidType(); if (modifiers.Count != 0) { throw new ParseException(Diagnostic .Error($"Invalid modifiers for trait: {modifiers[0].Modifier}") .WithHelp($"Traits cannot use the '{modifiers[0].Modifier}' modifier") .At(modifiers[0]) .Build()); } functions.Add(new TraitFunc(funcName, parameters, returnType)); } return new TraitDefinitionNode(GetTokensForNode(startIndex), _namespace, name, functions); } private static TraitImplementationDefinitionNode ParseImplementation(int startIndex, List modifiers) { var traitType = ParseType(); ExpectSymbol(Symbol.For); var forType = ParseType(); List functions = []; ExpectSymbol(Symbol.OpenBrace); while (!TryExpectSymbol(Symbol.CloseBrace)) { var funcStartIndex = _index; ExpectSymbol(Symbol.Func); var functionName = ExpectIdentifier().Value; var parameters = new List { new("this", forType) }; ExpectSymbol(Symbol.OpenParen); while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseFuncParameter()); if (!TryExpectSymbol(Symbol.Comma) && Peek().TryGetValue(out var token) && token is not SymbolToken { Symbol: Symbol.CloseParen }) { _diagnostics.Add(Diagnostic .Warning("Missing comma between function parameters") .WithHelp("Add a ',' to separate parameters") .At(token) .Build()); } } var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new NubVoidType(); var body = ParseBlock(); functions.AddRange(new ImplementationFuncNode(GetTokensForNode(funcStartIndex), functionName, parameters, returnType, body)); } if (modifiers.Count != 0) { throw new ParseException(Diagnostic .Error($"Invalid modifiers for implementation: {modifiers[0].Modifier}") .WithHelp($"Implementations cannot use the '{modifiers[0].Modifier}' modifier") .At(modifiers[0]) .Build()); } return new TraitImplementationDefinitionNode(GetTokensForNode(startIndex), _namespace, traitType, forType, functions); } private static FuncParameter ParseFuncParameter() { var name = ExpectIdentifier(); ExpectSymbol(Symbol.Colon); var type = ParseType(); return new FuncParameter(name.Value, type); } private static StatementNode ParseStatement() { var startIndex = _index; if (!Peek().TryGetValue(out var token)) { throw new ParseException(Diagnostic .Error("Unexpected end of file while parsing statement") .At(_tokens.Last()) .Build()); } if (token is SymbolToken symbol) { switch (symbol.Symbol) { case Symbol.Return: return ParseReturn(startIndex); case Symbol.If: return ParseIf(startIndex); case Symbol.While: return ParseWhile(startIndex); case Symbol.Let: return ParseVariableDeclaration(startIndex); case Symbol.Break: return ParseBreak(startIndex); case Symbol.Continue: return ParseContinue(startIndex); } } return ParseStatementExpression(startIndex); } private static StatementNode ParseStatementExpression(int startIndex) { var expr = ParseExpression(); if (TryExpectSymbol(Symbol.Assign)) { var value = ParseExpression(); return new AssignmentNode(GetTokensForNode(startIndex), expr, value); } return new StatementExpressionNode(GetTokensForNode(startIndex), expr); } private static VariableDeclarationNode ParseVariableDeclaration(int startIndex) { ExpectSymbol(Symbol.Let); var name = ExpectIdentifier().Value; var explicitType = Optional.Empty(); if (TryExpectSymbol(Symbol.Colon)) { explicitType = ParseType(); } var assignment = Optional.Empty(); if (TryExpectSymbol(Symbol.Assign)) { assignment = ParseExpression(); } return new VariableDeclarationNode(GetTokensForNode(startIndex), name, explicitType, assignment); } private static StatementNode ParseBreak(int startIndex) { ExpectSymbol(Symbol.Break); Next(); return new BreakNode(GetTokensForNode(startIndex)); } private static StatementNode ParseContinue(int startIndex) { ExpectSymbol(Symbol.Continue); return new ContinueNode(GetTokensForNode(startIndex)); } private static ReturnNode ParseReturn(int startIndex) { ExpectSymbol(Symbol.Return); var value = Optional.Empty(); if (_functionReturnType is not NubVoidType) { value = ParseExpression(); } return new ReturnNode(GetTokensForNode(startIndex), value); } private static IfNode ParseIf(int startIndex) { ExpectSymbol(Symbol.If); var condition = ParseExpression(); var body = ParseBlock(); var elseStatement = Optional>.Empty(); if (TryExpectSymbol(Symbol.Else)) { var newStartIndex = _index; elseStatement = TryExpectSymbol(Symbol.If) ? (Variant)ParseIf(newStartIndex) : (Variant)ParseBlock(); } return new IfNode(GetTokensForNode(startIndex), condition, body, elseStatement); } private static WhileNode ParseWhile(int startIndex) { ExpectSymbol(Symbol.While); var condition = ParseExpression(); var body = ParseBlock(); return new WhileNode(GetTokensForNode(startIndex), condition, body); } private static ExpressionNode ParseExpression(int precedence = 0) { var startIndex = _index; var left = ParsePrimaryExpression(); while (true) { var token = Peek(); if (!token.HasValue || token.Value is not SymbolToken symbolToken || !TryGetBinaryOperator(symbolToken.Symbol, out var op) || GetBinaryOperatorPrecedence(op.Value) < precedence) { break; } Next(); var right = ParseExpression(GetBinaryOperatorPrecedence(op.Value) + 1); left = new BinaryExpressionNode(GetTokensForNode(startIndex), left, op.Value, right); } return left; } private static int GetBinaryOperatorPrecedence(BinaryExpressionOperator binaryExpressionOperator) { return binaryExpressionOperator switch { BinaryExpressionOperator.Multiply => 3, BinaryExpressionOperator.Divide => 3, BinaryExpressionOperator.Plus => 2, BinaryExpressionOperator.Minus => 2, BinaryExpressionOperator.GreaterThan => 1, BinaryExpressionOperator.GreaterThanOrEqual => 1, BinaryExpressionOperator.LessThan => 1, BinaryExpressionOperator.LessThanOrEqual => 1, BinaryExpressionOperator.Equal => 0, BinaryExpressionOperator.NotEqual => 0, _ => throw new ArgumentOutOfRangeException(nameof(binaryExpressionOperator), binaryExpressionOperator, null) }; } private static bool TryGetBinaryOperator(Symbol symbol, [NotNullWhen(true)] out BinaryExpressionOperator? binaryExpressionOperator) { switch (symbol) { case Symbol.Equal: binaryExpressionOperator = BinaryExpressionOperator.Equal; return true; case Symbol.NotEqual: binaryExpressionOperator = BinaryExpressionOperator.NotEqual; return true; case Symbol.LessThan: binaryExpressionOperator = BinaryExpressionOperator.LessThan; return true; case Symbol.LessThanOrEqual: binaryExpressionOperator = BinaryExpressionOperator.LessThanOrEqual; return true; case Symbol.GreaterThan: binaryExpressionOperator = BinaryExpressionOperator.GreaterThan; return true; case Symbol.GreaterThanOrEqual: binaryExpressionOperator = BinaryExpressionOperator.GreaterThanOrEqual; return true; case Symbol.Plus: binaryExpressionOperator = BinaryExpressionOperator.Plus; return true; case Symbol.Minus: binaryExpressionOperator = BinaryExpressionOperator.Minus; return true; case Symbol.Star: binaryExpressionOperator = BinaryExpressionOperator.Multiply; return true; case Symbol.ForwardSlash: binaryExpressionOperator = BinaryExpressionOperator.Divide; return true; default: binaryExpressionOperator = null; return false; } } private static ExpressionNode ParsePrimaryExpression() { var startIndex = _index; ExpressionNode expr; var token = ExpectToken(); switch (token) { case LiteralToken literal: { expr = new LiteralNode(GetTokensForNode(startIndex), literal.Value, literal.Kind); break; } case IdentifierToken identifier: { var @namespace = Optional.Empty(); var name = identifier.Value; if (TryExpectSymbol(Symbol.DoubleColon)) { @namespace = identifier.Value; name = ExpectIdentifier().Value; } expr = new IdentifierNode(GetTokensForNode(startIndex), @namespace, name); break; } case SymbolToken symbolToken: { switch (symbolToken.Symbol) { case Symbol.Func: { List parameters = []; ExpectSymbol(Symbol.OpenParen); while (!TryExpectSymbol(Symbol.CloseParen)) { var parameter = ParseFuncParameter(); parameters.Add(parameter); if (!TryExpectSymbol(Symbol.Comma) && Peek().TryGetValue(out var nextToken) && nextToken is not SymbolToken { Symbol: Symbol.CloseParen }) { _diagnostics.Add(Diagnostic .Warning("Missing comma between function arguments") .WithHelp("Add a ',' to separate arguments") .At(nextToken) .Build()); } } var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new NubVoidType(); var body = ParseBlock(); expr = new AnonymousFuncNode(GetTokensForNode(startIndex), parameters, body, returnType); break; } case Symbol.OpenParen: { var expression = ParseExpression(); ExpectSymbol(Symbol.CloseParen); expr = expression; break; } case Symbol.Minus: { var expression = ParsePrimaryExpression(); expr = new UnaryExpressionNode(GetTokensForNode(startIndex), UnaryExpressionOperator.Negate, expression); break; } case Symbol.Bang: { var expression = ParsePrimaryExpression(); expr = new UnaryExpressionNode(GetTokensForNode(startIndex), UnaryExpressionOperator.Invert, expression); break; } case Symbol.OpenBracket: { var capacity = ParseExpression(); ExpectSymbol(Symbol.CloseBracket); var type = ParseType(); expr = new ArrayInitializerNode(GetTokensForNode(startIndex), capacity, type); break; } case Symbol.Alloc: { var type = ParseType(); Dictionary initializers = []; ExpectSymbol(Symbol.OpenBrace); while (!TryExpectSymbol(Symbol.CloseBrace)) { var name = ExpectIdentifier().Value; ExpectSymbol(Symbol.Assign); var value = ParseExpression(); initializers.Add(name, value); } expr = new StructInitializerNode(GetTokensForNode(startIndex), type, initializers); break; } default: { throw new ParseException(Diagnostic .Error($"Unexpected symbol '{symbolToken.Symbol}' in expression") .WithHelp("Expected literal, identifier, or '(' to start expression") .At(symbolToken) .Build()); } } break; } default: { throw new ParseException(Diagnostic .Error($"Unexpected token '{token.GetType().Name}' in expression") .WithHelp("Expected literal, identifier, or parenthesized expression") .At(token) .Build()); } } return ParsePostfixOperators(startIndex, expr); } private static ExpressionNode ParsePostfixOperators(int startIndex, ExpressionNode expr) { while (true) { if (TryExpectSymbol(Symbol.Ampersand)) { expr = new AddressOfNode(GetTokensForNode(startIndex), expr); break; } if (TryExpectSymbol(Symbol.Caret)) { expr = new DereferenceNode(GetTokensForNode(startIndex), expr); continue; } if (TryExpectSymbol(Symbol.Period)) { var structMember = ExpectIdentifier().Value; expr = new MemberAccessNode(GetTokensForNode(startIndex), expr, structMember); continue; } if (TryExpectSymbol(Symbol.OpenBracket)) { var index = ParseExpression(); ExpectSymbol(Symbol.CloseBracket); expr = new ArrayIndexAccessNode(GetTokensForNode(startIndex), expr, index); continue; } if (TryExpectSymbol(Symbol.OpenParen)) { var parameters = new List(); while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseExpression()); if (!TryExpectSymbol(Symbol.Comma) && Peek().TryGetValue(out var nextToken) && nextToken is not SymbolToken { Symbol: Symbol.CloseParen }) { _diagnostics.Add(Diagnostic .Warning("Missing comma between function arguments") .WithHelp("Add a ',' to separate arguments") .At(nextToken) .Build()); } } expr = new FuncCallNode(GetTokensForNode(startIndex), expr, parameters); continue; } break; } return expr; } private static BlockNode ParseBlock() { var startIndex = _index; ExpectSymbol(Symbol.OpenBrace); List statements = []; while (Peek().HasValue && !TryExpectSymbol(Symbol.CloseBrace)) { try { statements.Add(ParseStatement()); } catch (ParseException ex) { _diagnostics.Add(ex.Diagnostic); RecoverToNextStatement(); } } return new BlockNode(GetTokensForNode(startIndex), statements); } private static NubType ParseType() { if (TryExpectIdentifier(out var name)) { if (name.Value == "void") { return new NubVoidType(); } if (name.Value == "string") { return new NubStringType(); } if (name.Value == "cstring") { return new NubCStringType(); } if (NubPrimitiveType.TryParse(name.Value, out var primitiveTypeKind)) { return new NubPrimitiveType(primitiveTypeKind.Value); } var @namespace = _namespace; if (TryExpectSymbol(Symbol.DoubleColon)) { @namespace = ExpectIdentifier().Value; } return new NubCustomType(@namespace, name.Value); } if (TryExpectSymbol(Symbol.Caret)) { var baseType = ParseType(); return new NubPointerType(baseType); } if (TryExpectSymbol(Symbol.Func)) { ExpectSymbol(Symbol.OpenParen); List parameters = []; while (!TryExpectSymbol(Symbol.CloseParen)) { var parameter = ParseType(); parameters.Add(parameter); if (!TryExpectSymbol(Symbol.Comma) && Peek().TryGetValue(out var nextToken) && nextToken is not SymbolToken { Symbol: Symbol.CloseParen }) { _diagnostics.Add(Diagnostic .Warning("Missing comma between func type arguments") .WithHelp("Add a ',' to separate arguments") .At(nextToken) .Build()); } } var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new NubVoidType(); return new NubFuncType(returnType, parameters); } if (TryExpectSymbol(Symbol.OpenBracket)) { ExpectSymbol(Symbol.CloseBracket); var baseType = ParseType(); return new NubArrayType(baseType); } if (!Peek().TryGetValue(out var peekToken)) { throw new ParseException(Diagnostic .Error("Unexpected end of file while parsing type") .WithHelp("Expected a type name") .At(_tokens.Last()) .Build()); } throw new ParseException(Diagnostic .Error("Invalid type Syntax") .WithHelp("Expected type name, '^' for pointer, or '[]' for array") .At(peekToken) .Build()); } private static Token ExpectToken() { if (!Peek().TryGetValue(out var token)) { throw new ParseException(Diagnostic .Error("Unexpected end of file") .WithHelp("Expected more tokens to complete the Syntax") .At(_tokens.Last()) .Build()); } Next(); return token; } private static SymbolToken ExpectSymbol() { var token = ExpectToken(); if (token is not SymbolToken symbol) { throw new ParseException(Diagnostic .Error($"Expected symbol, but found {token.GetType().Name}") .WithHelp("This position requires a symbol like '(', ')', '{', '}', etc.") .At(token) .Build()); } return symbol; } private static void ExpectSymbol(Symbol expectedSymbol) { var token = ExpectSymbol(); if (token.Symbol != expectedSymbol) { throw new ParseException(Diagnostic .Error($"Expected '{expectedSymbol}', but found '{token.Symbol}'") .WithHelp($"Insert '{expectedSymbol}' here") .At(token) .Build()); } } private static bool TryExpectSymbol(Symbol symbol) { if (Peek() is { Value: SymbolToken symbolToken } && symbolToken.Symbol == symbol) { Next(); return true; } return false; } private static bool TryExpectModifier([NotNullWhen(true)] out ModifierToken? modifier) { if (Peek() is { Value: ModifierToken modifierToken }) { modifier = modifierToken; Next(); return true; } modifier = null; return false; } private static bool TryExpectIdentifier([NotNullWhen(true)] out IdentifierToken? identifier) { if (Peek() is { Value: IdentifierToken identifierToken }) { identifier = identifierToken; Next(); return true; } identifier = null; return false; } private static IdentifierToken ExpectIdentifier() { var token = ExpectToken(); if (token is not IdentifierToken identifier) { throw new ParseException(Diagnostic .Error($"Expected identifier, but found {token.GetType().Name}") .WithHelp("Provide a valid identifier name here") .At(token) .Build()); } return identifier; } private static void RecoverToNextDefinition() { while (Peek().HasValue) { var token = Peek().Value; if (token is SymbolToken { Symbol: Symbol.Func or Symbol.Struct } or ModifierToken) { break; } Next(); } } private static void RecoverToNextStatement() { while (Peek().TryGetValue(out var token)) { if (token is SymbolToken { Symbol: Symbol.CloseBrace } or IdentifierToken or SymbolToken { Symbol: Symbol.Return or Symbol.If or Symbol.While or Symbol.Let or Symbol.Break or Symbol.Continue }) { break; } Next(); } } private static Optional Peek(int offset = 0) { var peekIndex = _index + offset; if (peekIndex < _tokens.Count()) { return _tokens.ElementAt(peekIndex); } return Optional.Empty(); } private static void Next() { _index++; } private static IEnumerable GetTokensForNode(int startIndex) { return _tokens.Skip(startIndex).Take(Math.Min(_index, _tokens.Count() - 1) - startIndex); } } public class ParseException : Exception { public Diagnostic Diagnostic { get; } public ParseException(Diagnostic diagnostic) : base(diagnostic.Message) { Diagnostic = diagnostic; } }