using System.Diagnostics.CodeAnalysis; using NubLang.Diagnostics; namespace NubLang.Syntax; public sealed class Parser { private List _tokens = []; private int _tokenIndex; private string _moduleName = string.Empty; private Token? CurrentToken => _tokenIndex < _tokens.Count ? _tokens[_tokenIndex] : null; private bool HasToken => CurrentToken != null; public List Diagnostics { get; } = []; public SyntaxTree Parse(List tokens) { Diagnostics.Clear(); _tokens = tokens; _tokenIndex = 0; _moduleName = string.Empty; var imports = new List(); try { while (TryExpectSymbol(Symbol.Import)) { imports.Add(ExpectStringLiteral().Value); } ExpectSymbol(Symbol.Module); _moduleName = ExpectStringLiteral().Value; } catch (ParseException e) { Diagnostics.Add(e.Diagnostic); while (HasToken) { if (CurrentToken is SymbolToken { Symbol: Symbol.Module or Symbol.Import }) { break; } Next(); } } var definitions = new List(); while (HasToken) { try { var startIndex = _tokenIndex; var exported = TryExpectSymbol(Symbol.Export); if (TryExpectSymbol(Symbol.Extern)) { var externSymbol = ExpectStringLiteral(); ExpectSymbol(Symbol.Func); definitions.Add(ParseFunc(startIndex, exported, externSymbol.Value)); continue; } var keyword = ExpectSymbol(); DefinitionSyntax definition = keyword.Symbol switch { Symbol.Func => ParseFunc(startIndex, exported, null), Symbol.Struct => ParseStruct(startIndex, exported), _ => throw new ParseException(Diagnostic .Error($"Expected 'func' or 'struct' but found '{keyword.Symbol}'") .WithHelp("Valid definition keywords are 'func' and 'struct'") .At(keyword) .Build()) }; definitions.Add(definition); } catch (ParseException e) { Diagnostics.Add(e.Diagnostic); while (HasToken) { if (CurrentToken is SymbolToken { Symbol: Symbol.Extern or Symbol.Func or Symbol.Struct }) { break; } Next(); } } } return new SyntaxTree(definitions, _moduleName, imports); } private FuncParameterSyntax ParseFuncParameter() { var startIndex = _tokenIndex; var name = ExpectIdentifier(); ExpectSymbol(Symbol.Colon); var type = ParseType(); return new FuncParameterSyntax(GetTokens(startIndex), name.Value, type); } private FuncSyntax ParseFunc(int startIndex, bool exported, string? externSymbol) { var name = ExpectIdentifier(); List parameters = []; ExpectSymbol(Symbol.OpenParen); while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseFuncParameter()); if (!TryExpectSymbol(Symbol.Comma)) { ExpectSymbol(Symbol.CloseParen); break; } } var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new VoidTypeSyntax([]); var prototype = new FuncPrototypeSyntax(GetTokens(startIndex), name.Value, exported, externSymbol, parameters, returnType); BlockSyntax? body = null; var bodyStartIndex = _tokenIndex; if (TryExpectSymbol(Symbol.OpenBrace)) { body = ParseBlock(bodyStartIndex); } return new FuncSyntax(GetTokens(startIndex), prototype, body); } private StructSyntax ParseStruct(int startIndex, bool exported) { var name = ExpectIdentifier(); ExpectSymbol(Symbol.OpenBrace); List fields = []; while (!TryExpectSymbol(Symbol.CloseBrace)) { var memberStartIndex = _tokenIndex; var fieldName = ExpectIdentifier().Value; ExpectSymbol(Symbol.Colon); var fieldType = ParseType(); ExpressionSyntax? fieldValue = null; if (TryExpectSymbol(Symbol.Assign)) { fieldValue = ParseExpression(); } fields.Add(new StructFieldSyntax(GetTokens(memberStartIndex), fieldName, fieldType, fieldValue)); } return new StructSyntax(GetTokens(startIndex), name.Value, exported, fields); } private StatementSyntax ParseStatement() { var startIndex = _tokenIndex; if (TryExpectSymbol(out var symbol)) { switch (symbol) { case Symbol.OpenBrace: return ParseBlock(startIndex); case Symbol.Return: return ParseReturn(startIndex); case Symbol.If: return ParseIf(startIndex); case Symbol.While: return ParseWhile(startIndex); case Symbol.Let: return ParseVariableDeclaration(startIndex); case Symbol.Defer: return ParseDefer(startIndex); case Symbol.Break: return new BreakSyntax(GetTokens(startIndex)); case Symbol.Continue: return new ContinueSyntax(GetTokens(startIndex)); } } var expr = ParseExpression(); if (TryExpectSymbol(Symbol.Assign)) { var value = ParseExpression(); return new AssignmentSyntax(GetTokens(startIndex), expr, value); } return new StatementExpressionSyntax(GetTokens(startIndex), expr); } private VariableDeclarationSyntax ParseVariableDeclaration(int startIndex) { var name = ExpectIdentifier().Value; TypeSyntax? explicitType = null; if (TryExpectSymbol(Symbol.Colon)) { explicitType = ParseType(); } ExpressionSyntax? assignment = null; if (TryExpectSymbol(Symbol.Assign)) { assignment = ParseExpression(); } return new VariableDeclarationSyntax(GetTokens(startIndex), name, explicitType, assignment); } private DeferSyntax ParseDefer(int startIndex) { var statement = ParseStatement(); return new DeferSyntax(GetTokens(startIndex), statement); } private ReturnSyntax ParseReturn(int startIndex) { ExpressionSyntax? value = null; if (!TryExpectSymbol(Symbol.Semi)) { value = ParseExpression(); } return new ReturnSyntax(GetTokens(startIndex), value); } private IfSyntax ParseIf(int startIndex) { var condition = ParseExpression(); var body = ParseBlock(); Variant? elseStatement = null; var elseStartIndex = _tokenIndex; if (TryExpectSymbol(Symbol.Else)) { if (TryExpectSymbol(Symbol.If)) { elseStatement = (Variant)ParseIf(elseStartIndex); } else { elseStatement = (Variant)ParseBlock(); } } return new IfSyntax(GetTokens(startIndex), condition, body, elseStatement); } private WhileSyntax ParseWhile(int startIndex) { var condition = ParseExpression(); var body = ParseBlock(); return new WhileSyntax(GetTokens(startIndex), condition, body); } private ExpressionSyntax ParseExpression(int precedence = 0) { var startIndex = _tokenIndex; var left = ParsePrimaryExpression(); while (CurrentToken is SymbolToken symbolToken && TryGetBinaryOperator(symbolToken.Symbol, out var op) && GetBinaryOperatorPrecedence(op.Value) >= precedence) { Next(); var right = ParseExpression(GetBinaryOperatorPrecedence(op.Value) + 1); left = new BinaryExpressionSyntax(GetTokens(startIndex), left, op.Value, right); } return left; } private static int GetBinaryOperatorPrecedence(BinaryOperatorSyntax operatorSyntax) { return operatorSyntax switch { BinaryOperatorSyntax.Multiply => 10, BinaryOperatorSyntax.Divide => 10, BinaryOperatorSyntax.Modulo => 10, BinaryOperatorSyntax.Plus => 9, BinaryOperatorSyntax.Minus => 9, BinaryOperatorSyntax.LeftShift => 8, BinaryOperatorSyntax.RightShift => 8, BinaryOperatorSyntax.GreaterThan => 7, BinaryOperatorSyntax.GreaterThanOrEqual => 7, BinaryOperatorSyntax.LessThan => 7, BinaryOperatorSyntax.LessThanOrEqual => 7, BinaryOperatorSyntax.Equal => 7, BinaryOperatorSyntax.NotEqual => 7, BinaryOperatorSyntax.BitwiseAnd => 6, BinaryOperatorSyntax.BitwiseXor => 5, BinaryOperatorSyntax.BitwiseOr => 4, BinaryOperatorSyntax.LogicalAnd => 3, BinaryOperatorSyntax.LogicalOr => 2, _ => throw new ArgumentOutOfRangeException(nameof(operatorSyntax), operatorSyntax, null) }; } private bool TryGetBinaryOperator(Symbol symbol, [NotNullWhen(true)] out BinaryOperatorSyntax? binaryExpressionOperator) { switch (symbol) { case Symbol.Equal: binaryExpressionOperator = BinaryOperatorSyntax.Equal; return true; case Symbol.NotEqual: binaryExpressionOperator = BinaryOperatorSyntax.NotEqual; return true; case Symbol.LessThan: binaryExpressionOperator = BinaryOperatorSyntax.LessThan; return true; case Symbol.LessThanOrEqual: binaryExpressionOperator = BinaryOperatorSyntax.LessThanOrEqual; return true; case Symbol.GreaterThan: binaryExpressionOperator = BinaryOperatorSyntax.GreaterThan; return true; case Symbol.GreaterThanOrEqual: binaryExpressionOperator = BinaryOperatorSyntax.GreaterThanOrEqual; return true; case Symbol.And: binaryExpressionOperator = BinaryOperatorSyntax.LogicalAnd; return true; case Symbol.Or: binaryExpressionOperator = BinaryOperatorSyntax.LogicalOr; return true; case Symbol.Plus: binaryExpressionOperator = BinaryOperatorSyntax.Plus; return true; case Symbol.Minus: binaryExpressionOperator = BinaryOperatorSyntax.Minus; return true; case Symbol.Star: binaryExpressionOperator = BinaryOperatorSyntax.Multiply; return true; case Symbol.ForwardSlash: binaryExpressionOperator = BinaryOperatorSyntax.Divide; return true; case Symbol.Percent: binaryExpressionOperator = BinaryOperatorSyntax.Modulo; return true; case Symbol.LeftShift: binaryExpressionOperator = BinaryOperatorSyntax.LeftShift; return true; case Symbol.RightShift: binaryExpressionOperator = BinaryOperatorSyntax.RightShift; return true; case Symbol.Ampersand: binaryExpressionOperator = BinaryOperatorSyntax.BitwiseAnd; return true; case Symbol.Pipe: binaryExpressionOperator = BinaryOperatorSyntax.BitwiseOr; return true; case Symbol.Caret: binaryExpressionOperator = BinaryOperatorSyntax.BitwiseXor; return true; default: binaryExpressionOperator = null; return false; } } private ExpressionSyntax ParsePrimaryExpression() { var startIndex = _tokenIndex; var token = ExpectToken(); var expr = token switch { BoolLiteralToken boolLiteral => new BoolLiteralSyntax(GetTokens(startIndex), boolLiteral.Value), StringLiteralToken stringLiteral => new StringLiteralSyntax(GetTokens(startIndex), stringLiteral.Value), FloatLiteralToken floatLiteral => new FloatLiteralSyntax(GetTokens(startIndex), floatLiteral.Value), IntLiteralToken intLiteral => new IntLiteralSyntax(GetTokens(startIndex), intLiteral.Value, intLiteral.Base), IdentifierToken identifier => ParseIdentifier(startIndex, identifier), SymbolToken symbolToken => symbolToken.Symbol switch { Symbol.OpenParen => ParseParenthesizedExpression(), Symbol.Minus => new UnaryExpressionSyntax(GetTokens(startIndex), UnaryOperatorSyntax.Negate, ParsePrimaryExpression()), Symbol.Bang => new UnaryExpressionSyntax(GetTokens(startIndex), UnaryOperatorSyntax.Invert, ParsePrimaryExpression()), Symbol.OpenBracket => ParseArrayInitializer(startIndex), Symbol.OpenBrace => new StructInitializerSyntax(GetTokens(startIndex), null, ParseStructInitializerBody()), Symbol.Struct => ParseStructInitializer(startIndex), Symbol.At => ParseBuiltinFunction(startIndex), _ => throw new ParseException(Diagnostic .Error($"Unexpected symbol '{symbolToken.Symbol}' in expression") .WithHelp("Expected '(', '-', '!', '[' or '{'") .At(symbolToken) .Build()) }, _ => throw new ParseException(Diagnostic .Error($"Unexpected token '{token.GetType().Name}' in expression") .WithHelp("Expected literal, identifier, or parenthesized expression") .At(token) .Build()) }; return ParsePostfixOperators(expr); } private ExpressionSyntax ParseBuiltinFunction(int startIndex) { var name = ExpectIdentifier(); ExpectSymbol(Symbol.OpenParen); switch (name.Value) { case "size": { var type = ParseType(); ExpectSymbol(Symbol.CloseParen); return new SizeBuiltinSyntax(GetTokens(startIndex), type); } case "interpret": { var type = ParseType(); ExpectSymbol(Symbol.Comma); var expression = ParseExpression(); ExpectSymbol(Symbol.CloseParen); return new InterpretBuiltinSyntax(GetTokens(startIndex), type, expression); } case "floatToInt": { var type = ParseType(); ExpectSymbol(Symbol.Comma); var expression = ParseExpression(); ExpectSymbol(Symbol.CloseParen); return new FloatToIntBuiltinSyntax(GetTokens(startIndex), type, expression); } default: { throw new ParseException(Diagnostic.Error($"Unknown builtin {name.Value}").At(name).Build()); } } } private ExpressionSyntax ParseIdentifier(int startIndex, IdentifierToken identifier) { if (TryExpectSymbol(Symbol.DoubleColon)) { var name = ExpectIdentifier(); return new ModuleIdentifierSyntax(GetTokens(startIndex), identifier.Value, name.Value); } return new LocalIdentifierSyntax(GetTokens(startIndex), identifier.Value); } private ExpressionSyntax ParseParenthesizedExpression() { var expression = ParseExpression(); ExpectSymbol(Symbol.CloseParen); return expression; } private ExpressionSyntax ParsePostfixOperators(ExpressionSyntax expr) { var startIndex = _tokenIndex; while (HasToken) { if (TryExpectSymbol(Symbol.Ampersand)) { expr = new AddressOfSyntax(GetTokens(startIndex), expr); continue; } if (TryExpectSymbol(Symbol.Caret)) { expr = new DereferenceSyntax(GetTokens(startIndex), expr); continue; } if (TryExpectSymbol(Symbol.Period)) { var member = ExpectIdentifier().Value; expr = new MemberAccessSyntax(GetTokens(startIndex), expr, member); continue; } if (TryExpectSymbol(Symbol.OpenBracket)) { var index = ParseExpression(); ExpectSymbol(Symbol.CloseBracket); expr = new ArrayIndexAccessSyntax(GetTokens(startIndex), expr, index); continue; } if (TryExpectSymbol(Symbol.OpenParen)) { var parameters = new List(); while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseExpression()); if (!TryExpectSymbol(Symbol.Comma)) { ExpectSymbol(Symbol.CloseParen); break; } } expr = new FuncCallSyntax(GetTokens(startIndex), expr, parameters); continue; } break; } return expr; } private ExpressionSyntax ParseArrayInitializer(int startIndex) { var capacity = ParseExpression(); ExpectSymbol(Symbol.CloseBracket); var type = ParseType(); return new ArrayInitializerSyntax(GetTokens(startIndex), capacity, type); } private StructInitializerSyntax ParseStructInitializer(int startIndex) { TypeSyntax? type = null; if (!TryExpectSymbol(Symbol.OpenBrace)) { type = ParseType(); ExpectSymbol(Symbol.OpenBrace); } var initializers = ParseStructInitializerBody(); return new StructInitializerSyntax(GetTokens(startIndex), type, initializers); } private Dictionary ParseStructInitializerBody() { Dictionary initializers = []; while (!TryExpectSymbol(Symbol.CloseBrace)) { var name = ExpectIdentifier().Value; ExpectSymbol(Symbol.Assign); var value = ParseExpression(); initializers.Add(name, value); } return initializers; } private BlockSyntax ParseBlock() { var startIndex = _tokenIndex; ExpectSymbol(Symbol.OpenBrace); return ParseBlock(startIndex); } private BlockSyntax ParseBlock(int startIndex) { List statements = []; while (!TryExpectSymbol(Symbol.CloseBrace)) { try { statements.Add(ParseStatement()); } catch (ParseException ex) { Diagnostics.Add(ex.Diagnostic); if (HasToken) { Next(); } else { break; } } } return new BlockSyntax(GetTokens(startIndex), statements); } private TypeSyntax ParseType() { var startIndex = _tokenIndex; if (TryExpectIdentifier(out var name)) { if (name.Value[0] == 'u' && int.TryParse(name.Value[1..], out var size)) { if (size is not 8 and not 16 and not 32 and not 64) { throw new ParseException(Diagnostic .Error("Arbitrary uint size is not supported") .WithHelp("Use u8, u16, u32 or u64") .At(name) .Build()); } return new IntTypeSyntax(GetTokens(startIndex), false, size); } if (name.Value[0] == 'i' && int.TryParse(name.Value[1..], out size)) { if (size is not 8 and not 16 and not 32 and not 64) { throw new ParseException(Diagnostic .Error("Arbitrary int size is not supported") .WithHelp("Use i8, i16, i32 or i64") .At(name) .Build()); } return new IntTypeSyntax(GetTokens(startIndex), true, size); } if (name.Value[0] == 'f' && int.TryParse(name.Value[1..], out size)) { if (size is not 32 and not 64) { throw new ParseException(Diagnostic .Error("Arbitrary float size is not supported") .WithHelp("Use f32 or f64") .At(name) .Build()); } return new FloatTypeSyntax(GetTokens(startIndex), size); } switch (name.Value) { case "void": return new VoidTypeSyntax(GetTokens(startIndex)); case "string": return new StringTypeSyntax(GetTokens(startIndex)); case "cstring": return new CStringTypeSyntax(GetTokens(startIndex)); case "bool": return new BoolTypeSyntax(GetTokens(startIndex)); default: { var module = _moduleName; if (TryExpectSymbol(Symbol.DoubleColon)) { var customTypeName = ExpectIdentifier(); module = name.Value; name = customTypeName; } return new CustomTypeSyntax(GetTokens(startIndex), module, name.Value); } } } if (TryExpectSymbol(Symbol.Caret)) { var baseType = ParseType(); return new PointerTypeSyntax(GetTokens(startIndex), baseType); } if (TryExpectSymbol(Symbol.Func)) { ExpectSymbol(Symbol.OpenParen); List parameters = []; while (!TryExpectSymbol(Symbol.CloseParen)) { parameters.Add(ParseType()); if (!TryExpectSymbol(Symbol.Comma)) { ExpectSymbol(Symbol.CloseParen); break; } } var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new VoidTypeSyntax([]); return new FuncTypeSyntax(GetTokens(startIndex), parameters, returnType); } if (TryExpectSymbol(Symbol.OpenBracket)) { if (TryExpectIntLiteral(out var intLiteral)) { ExpectSymbol(Symbol.CloseBracket); var baseType = ParseType(); return new ConstArrayTypeSyntax(GetTokens(startIndex), baseType, Convert.ToInt64(intLiteral.Value, intLiteral.Base)); } else if (TryExpectSymbol(Symbol.QuestionMark)) { ExpectSymbol(Symbol.CloseBracket); var baseType = ParseType(); return new ArrayTypeSyntax(GetTokens(startIndex), baseType); } else { ExpectSymbol(Symbol.CloseBracket); var baseType = ParseType(); return new SliceTypeSyntax(GetTokens(startIndex), baseType); } } throw new ParseException(Diagnostic .Error("Invalid type syntax") .WithHelp("Expected type name, '^' for pointer, or '[]' for array") .At(CurrentToken) .Build()); } private Token ExpectToken() { if (!HasToken) { throw new ParseException(Diagnostic .Error("Unexpected end of file") .WithHelp("Expected more tokens to complete the syntax") .At(_tokens[^1]) .Build()); } var token = CurrentToken!; Next(); return token; } private SymbolToken ExpectSymbol() { var token = ExpectToken(); if (token is not SymbolToken symbol) { throw new ParseException(Diagnostic .Error($"Expected symbol, but found {token.GetType().Name}") .WithHelp("This position requires a symbol like '(', ')', '{', '}', etc.") .At(token) .Build()); } return symbol; } private void ExpectSymbol(Symbol expectedSymbol) { var token = ExpectSymbol(); if (token.Symbol != expectedSymbol) { throw new ParseException(Diagnostic .Error($"Expected '{expectedSymbol}', but found '{token.Symbol}'") .WithHelp($"Insert '{expectedSymbol}' here") .At(token) .Build()); } } private bool TryExpectSymbol(out Symbol symbol) { if (CurrentToken is SymbolToken symbolToken) { Next(); symbol = symbolToken.Symbol; return true; } symbol = default; return false; } private bool TryExpectSymbol(Symbol symbol) { if (CurrentToken is SymbolToken symbolToken && symbolToken.Symbol == symbol) { Next(); return true; } return false; } private bool TryExpectIdentifier([NotNullWhen(true)] out IdentifierToken? identifier) { if (CurrentToken is IdentifierToken identifierToken) { identifier = identifierToken; Next(); return true; } identifier = null; return false; } private IdentifierToken ExpectIdentifier() { var token = ExpectToken(); if (token is not IdentifierToken identifier) { throw new ParseException(Diagnostic .Error($"Expected identifier, but found {token.GetType().Name}") .WithHelp("Provide a valid identifier name here") .At(token) .Build()); } return identifier; } private bool TryExpectIntLiteral([NotNullWhen(true)] out IntLiteralToken? stringLiteral) { if (CurrentToken is IntLiteralToken token) { stringLiteral = token; Next(); return true; } stringLiteral = null; return false; } private StringLiteralToken ExpectStringLiteral() { var token = ExpectToken(); if (token is not StringLiteralToken identifier) { throw new ParseException(Diagnostic .Error($"Expected string literal, but found {token.GetType().Name}") .WithHelp("Provide a valid string literal") .At(token) .Build()); } return identifier; } private void Next() { _tokenIndex++; } private List GetTokens(int tokenStartIndex) { return _tokens.Skip(tokenStartIndex).Take(_tokenIndex - tokenStartIndex).ToList(); } } public record SyntaxTree(List Definitions, string ModuleName, List Imports); public class ParseException : Exception { public Diagnostic Diagnostic { get; } public ParseException(Diagnostic diagnostic) : base(diagnostic.Message) { Diagnostic = diagnostic; } }