Files
nub-lang/src/compiler/NubLang/Parsing/Parser.cs
nub31 d993581361 ...
2025-07-22 23:20:56 +02:00

707 lines
22 KiB
C#

using System.Diagnostics.CodeAnalysis;
using NubLang.Common;
using NubLang.Diagnostics;
using NubLang.Parsing.Syntax;
using NubLang.Tokenization;
namespace NubLang.Parsing;
public sealed class Parser
{
private IEnumerator<Token> _tokenEnumerator = null!;
private readonly List<Diagnostic> _diagnostics = [];
private Token? _currentToken;
private bool _hasCurrentToken;
public IReadOnlyList<Diagnostic> GetDiagnostics()
{
return _diagnostics;
}
public SyntaxTree Parse(IEnumerable<Token> tokens)
{
_diagnostics.Clear();
_tokenEnumerator = tokens.GetEnumerator();
_hasCurrentToken = _tokenEnumerator.MoveNext();
_currentToken = _hasCurrentToken ? _tokenEnumerator.Current : null;
var definitions = new List<DefinitionSyntax>();
while (_hasCurrentToken)
{
try
{
var keyword = ExpectSymbol();
var definition = keyword.Symbol switch
{
Symbol.Extern => ParseExtern(),
Symbol.Func => ParseFunc(),
Symbol.Struct => ParseStruct(),
Symbol.Interface => ParseInterface(),
_ => throw new ParseException(Diagnostic
.Error($"Expected 'extern', 'func', 'struct' or 'interface' but found '{keyword.Symbol}'")
.WithHelp("Valid definition keywords are 'extern', 'func', 'struct' and 'interface'")
.Build())
};
definitions.Add(definition);
}
catch (ParseException ex)
{
_diagnostics.Add(ex.Diagnostic);
while (_hasCurrentToken)
{
if (_currentToken is SymbolToken { Symbol: Symbol.Extern or Symbol.Func or Symbol.Struct or Symbol.Interface })
{
break;
}
Next();
}
}
}
return new SyntaxTree(definitions);
}
private FuncSignatureSyntax ParseFuncSignature(FuncParameterSyntax? thisArg = null)
{
List<FuncParameterSyntax> parameters = [];
if (thisArg != null)
{
parameters.Add(thisArg);
}
ExpectSymbol(Symbol.OpenParen);
while (!TryExpectSymbol(Symbol.CloseParen))
{
parameters.Add(ParseFuncParameter());
if (!TryExpectSymbol(Symbol.Comma) && _currentToken is not SymbolToken { Symbol: Symbol.CloseParen })
{
_diagnostics.Add(Diagnostic
.Warning("Missing comma between function parameters")
.WithHelp("Add a ',' to separate parameters")
.Build());
}
}
var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new VoidTypeSyntax();
return new FuncSignatureSyntax(parameters, returnType);
}
private FuncParameterSyntax ParseFuncParameter()
{
var name = ExpectIdentifier();
ExpectSymbol(Symbol.Colon);
var type = ParseType();
return new FuncParameterSyntax(name.Value, type);
}
private DefinitionSyntax ParseExtern()
{
var keyword = ExpectSymbol();
return keyword.Symbol switch
{
Symbol.Func => ParseExternFunc(),
_ => throw new ParseException(Diagnostic.Error($"Unexpected symbol {keyword.Symbol} after extern declaration").Build())
};
}
private ExternFuncSyntax ParseExternFunc()
{
var name = ExpectIdentifier();
var callName = name.Value;
if (TryExpectSymbol(Symbol.Calls))
{
callName = ExpectIdentifier().Value;
}
var signature = ParseFuncSignature();
return new ExternFuncSyntax(name.Value, callName, signature);
}
private LocalFuncSyntax ParseFunc()
{
var name = ExpectIdentifier();
var signature = ParseFuncSignature();
var body = ParseBlock();
return new LocalFuncSyntax(name.Value, signature, body);
}
private DefinitionSyntax ParseStruct()
{
var name = ExpectIdentifier();
ExpectSymbol(Symbol.OpenBrace);
List<StructFieldSyntax> fields = [];
List<StructFuncSyntax> funcs = [];
var fieldIndex = 0;
while (!TryExpectSymbol(Symbol.CloseBrace))
{
if (TryExpectSymbol(Symbol.Func))
{
var funcName = ExpectIdentifier().Value;
var thisArg = new FuncParameterSyntax("this", new CustomTypeSyntax(name.Value));
var funcSignature = ParseFuncSignature(thisArg);
var funcBody = ParseBlock();
funcs.Add(new StructFuncSyntax(funcName, funcSignature, funcBody));
}
else
{
var fieldName = ExpectIdentifier().Value;
ExpectSymbol(Symbol.Colon);
var fieldType = ParseType();
var fieldValue = Optional<ExpressionSyntax>.Empty();
if (TryExpectSymbol(Symbol.Assign))
{
fieldValue = ParseExpression();
}
fields.Add(new StructFieldSyntax(fieldIndex++, fieldName, fieldType, fieldValue));
}
}
return new StructSyntax(name.Value, fields, funcs);
}
private InterfaceSyntax ParseInterface()
{
var name = ExpectIdentifier();
ExpectSymbol(Symbol.OpenBrace);
List<InterfaceFuncSyntax> functions = [];
while (!TryExpectSymbol(Symbol.CloseBrace))
{
ExpectSymbol(Symbol.Func);
var funcName = ExpectIdentifier().Value;
var signature = ParseFuncSignature();
functions.Add(new InterfaceFuncSyntax(funcName, signature));
}
return new InterfaceSyntax(name.Value, functions);
}
private StatementSyntax ParseStatement()
{
if (_currentToken is SymbolToken symbol)
{
switch (symbol.Symbol)
{
case Symbol.Return:
return ParseReturn();
case Symbol.If:
return ParseIf();
case Symbol.While:
return ParseWhile();
case Symbol.Let:
return ParseVariableDeclaration();
case Symbol.Break:
return ParseBreak();
case Symbol.Continue:
return ParseContinue();
}
}
return ParseStatementExpression();
}
private StatementSyntax ParseStatementExpression()
{
var expr = ParseExpression();
if (TryExpectSymbol(Symbol.Assign))
{
var value = ParseExpression();
return new AssignmentSyntax(expr, value);
}
return new StatementExpressionSyntax(expr);
}
private VariableDeclarationSyntax ParseVariableDeclaration()
{
ExpectSymbol(Symbol.Let);
var name = ExpectIdentifier().Value;
var explicitType = Optional<TypeSyntax>.Empty();
if (TryExpectSymbol(Symbol.Colon))
{
explicitType = ParseType();
}
var assignment = Optional<ExpressionSyntax>.Empty();
if (TryExpectSymbol(Symbol.Assign))
{
assignment = ParseExpression();
}
return new VariableDeclarationSyntax(name, explicitType, assignment);
}
private StatementSyntax ParseBreak()
{
ExpectSymbol(Symbol.Break);
return new BreakSyntax();
}
private StatementSyntax ParseContinue()
{
ExpectSymbol(Symbol.Continue);
return new ContinueSyntax();
}
private ReturnSyntax ParseReturn()
{
ExpectSymbol(Symbol.Return);
var value = Optional<ExpressionSyntax>.Empty();
if (!TryExpectSymbol(Symbol.Semi))
{
value = ParseExpression();
}
return new ReturnSyntax(value);
}
private IfSyntax ParseIf()
{
ExpectSymbol(Symbol.If);
var condition = ParseExpression();
var body = ParseBlock();
var elseStatement = Optional<Variant<IfSyntax, BlockSyntax>>.Empty();
if (TryExpectSymbol(Symbol.Else))
{
elseStatement = TryExpectSymbol(Symbol.If)
? (Variant<IfSyntax, BlockSyntax>)ParseIf()
: (Variant<IfSyntax, BlockSyntax>)ParseBlock();
}
return new IfSyntax(condition, body, elseStatement);
}
private WhileSyntax ParseWhile()
{
ExpectSymbol(Symbol.While);
var condition = ParseExpression();
var body = ParseBlock();
return new WhileSyntax(condition, body);
}
private ExpressionSyntax ParseExpression(int precedence = 0)
{
var left = ParsePrimaryExpression();
while (_currentToken is SymbolToken symbolToken && TryGetBinaryOperator(symbolToken.Symbol, out var op) && GetBinaryOperatorPrecedence(op.Value) >= precedence)
{
Next();
var right = ParseExpression(GetBinaryOperatorPrecedence(op.Value) + 1);
left = new BinaryExpressionSyntax(left, op.Value, right);
}
return left;
}
private int GetBinaryOperatorPrecedence(BinaryOperatorSyntax operatorSyntax)
{
return operatorSyntax switch
{
BinaryOperatorSyntax.Multiply => 3,
BinaryOperatorSyntax.Divide => 3,
BinaryOperatorSyntax.Plus => 2,
BinaryOperatorSyntax.Minus => 2,
BinaryOperatorSyntax.GreaterThan => 1,
BinaryOperatorSyntax.GreaterThanOrEqual => 1,
BinaryOperatorSyntax.LessThan => 1,
BinaryOperatorSyntax.LessThanOrEqual => 1,
BinaryOperatorSyntax.Equal => 0,
BinaryOperatorSyntax.NotEqual => 0,
_ => throw new ArgumentOutOfRangeException(nameof(operatorSyntax), operatorSyntax, null)
};
}
private bool TryGetBinaryOperator(Symbol symbol, [NotNullWhen(true)] out BinaryOperatorSyntax? binaryExpressionOperator)
{
switch (symbol)
{
case Symbol.Equal:
binaryExpressionOperator = BinaryOperatorSyntax.Equal;
return true;
case Symbol.NotEqual:
binaryExpressionOperator = BinaryOperatorSyntax.NotEqual;
return true;
case Symbol.LessThan:
binaryExpressionOperator = BinaryOperatorSyntax.LessThan;
return true;
case Symbol.LessThanOrEqual:
binaryExpressionOperator = BinaryOperatorSyntax.LessThanOrEqual;
return true;
case Symbol.GreaterThan:
binaryExpressionOperator = BinaryOperatorSyntax.GreaterThan;
return true;
case Symbol.GreaterThanOrEqual:
binaryExpressionOperator = BinaryOperatorSyntax.GreaterThanOrEqual;
return true;
case Symbol.Plus:
binaryExpressionOperator = BinaryOperatorSyntax.Plus;
return true;
case Symbol.Minus:
binaryExpressionOperator = BinaryOperatorSyntax.Minus;
return true;
case Symbol.Star:
binaryExpressionOperator = BinaryOperatorSyntax.Multiply;
return true;
case Symbol.ForwardSlash:
binaryExpressionOperator = BinaryOperatorSyntax.Divide;
return true;
default:
binaryExpressionOperator = null;
return false;
}
}
private ExpressionSyntax ParsePrimaryExpression()
{
var token = ExpectToken();
var expr = token switch
{
LiteralToken literal => new LiteralSyntax(literal.Value, literal.Kind),
IdentifierToken identifier => new IdentifierSyntax(identifier.Value),
SymbolToken symbolToken => symbolToken.Symbol switch
{
Symbol.Func => ParseArrowFunction(),
Symbol.OpenParen => ParseParenthesizedExpression(),
Symbol.Minus => new UnaryExpressionSyntax(UnaryOperatorSyntax.Negate, ParsePrimaryExpression()),
Symbol.Bang => new UnaryExpressionSyntax(UnaryOperatorSyntax.Invert, ParsePrimaryExpression()),
Symbol.OpenBracket => ParseArrayInitializer(),
Symbol.Alloc => ParseStructInitializer(),
_ => throw new ParseException(Diagnostic
.Error($"Unexpected symbol '{symbolToken.Symbol}' in expression")
.WithHelp("Expected literal, identifier, or '(' to start expression")
.Build())
},
_ => throw new ParseException(Diagnostic
.Error($"Unexpected token '{token.GetType().Name}' in expression")
.WithHelp("Expected literal, identifier, or parenthesized expression")
.Build())
};
return ParsePostfixOperators(expr);
}
private ExpressionSyntax ParseArrowFunction()
{
List<ArrowFuncParameterSyntax> parameters = [];
ExpectSymbol(Symbol.OpenParen);
while (!TryExpectSymbol(Symbol.CloseParen))
{
var name = ExpectIdentifier();
parameters.Add(new ArrowFuncParameterSyntax(name.Value));
}
ExpectSymbol(Symbol.Arrow);
BlockSyntax body;
if (_currentToken is SymbolToken { Symbol: Symbol.OpenBrace })
{
var returnValue = ParseExpression();
var arrowExpression = new ReturnSyntax(returnValue);
body = new BlockSyntax([arrowExpression]);
}
else
{
body = ParseBlock();
}
return new ArrowFuncSyntax(parameters, body);
}
private ExpressionSyntax ParseParenthesizedExpression()
{
var expression = ParseExpression();
ExpectSymbol(Symbol.CloseParen);
return expression;
}
private ExpressionSyntax ParseArrayInitializer()
{
var capacity = ParseExpression();
ExpectSymbol(Symbol.CloseBracket);
var type = ParseType();
return new ArrayInitializerSyntax(capacity, type);
}
private ExpressionSyntax ParseStructInitializer()
{
var type = ParseType();
Dictionary<string, ExpressionSyntax> initializers = [];
ExpectSymbol(Symbol.OpenBrace);
while (!TryExpectSymbol(Symbol.CloseBrace))
{
var name = ExpectIdentifier().Value;
ExpectSymbol(Symbol.Assign);
var value = ParseExpression();
initializers.Add(name, value);
}
return new StructInitializerSyntax(type, initializers);
}
private ExpressionSyntax ParsePostfixOperators(ExpressionSyntax expr)
{
while (_hasCurrentToken)
{
if (TryExpectSymbol(Symbol.Ampersand))
{
expr = new AddressOfSyntax(expr);
continue;
}
if (TryExpectSymbol(Symbol.Caret))
{
expr = new DereferenceSyntax(expr);
continue;
}
if (TryExpectSymbol(Symbol.Period))
{
var structMember = ExpectIdentifier().Value;
expr = new MemberAccessSyntax(expr, structMember);
continue;
}
if (TryExpectSymbol(Symbol.OpenBracket))
{
var index = ParseExpression();
ExpectSymbol(Symbol.CloseBracket);
expr = new ArrayIndexAccessSyntax(expr, index);
continue;
}
if (TryExpectSymbol(Symbol.OpenParen))
{
var parameters = new List<ExpressionSyntax>();
while (!TryExpectSymbol(Symbol.CloseParen))
{
parameters.Add(ParseExpression());
if (!TryExpectSymbol(Symbol.Comma) && _currentToken is not SymbolToken { Symbol: Symbol.CloseParen })
{
_diagnostics.Add(Diagnostic
.Warning("Missing comma between function arguments")
.WithHelp("Add a ',' to separate arguments")
.Build());
}
}
expr = new FuncCallSyntax(expr, parameters);
continue;
}
break;
}
return expr;
}
private BlockSyntax ParseBlock()
{
ExpectSymbol(Symbol.OpenBrace);
List<StatementSyntax> statements = [];
while (!TryExpectSymbol(Symbol.CloseBrace))
{
try
{
statements.Add(ParseStatement());
}
catch (ParseException ex)
{
_diagnostics.Add(ex.Diagnostic);
Next();
}
}
return new BlockSyntax(statements);
}
private TypeSyntax ParseType()
{
if (TryExpectIdentifier(out var name))
{
return name.Value switch
{
"void" => new VoidTypeSyntax(),
"string" => new StringTypeSyntax(),
"cstring" => new CStringTypeSyntax(),
"i64" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.I64),
"i32" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.I32),
"i16" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.I16),
"i8" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.I8),
"u64" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.U64),
"u32" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.U32),
"u16" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.U16),
"u8" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.U8),
"f64" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.F64),
"f32" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.F32),
"bool" => new PrimitiveTypeSyntax(PrimitiveTypeSyntaxKind.Bool),
_ => new CustomTypeSyntax(name.Value)
};
}
if (TryExpectSymbol(Symbol.Caret))
{
var baseType = ParseType();
return new PointerTypeSyntax(baseType);
}
if (TryExpectSymbol(Symbol.Func))
{
ExpectSymbol(Symbol.OpenParen);
List<TypeSyntax> parameters = [];
while (!TryExpectSymbol(Symbol.CloseParen))
{
var parameter = ParseType();
parameters.Add(parameter);
if (!TryExpectSymbol(Symbol.Comma) && _currentToken is not SymbolToken { Symbol: Symbol.CloseParen })
{
_diagnostics.Add(Diagnostic
.Warning("Missing comma between func type arguments")
.WithHelp("Add a ',' to separate arguments")
.Build());
}
}
var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new VoidTypeSyntax();
return new FuncTypeSyntax(parameters, returnType);
}
if (TryExpectSymbol(Symbol.OpenBracket))
{
ExpectSymbol(Symbol.CloseBracket);
var baseType = ParseType();
return new ArrayTypeSyntax(baseType);
}
throw new ParseException(Diagnostic
.Error("Invalid type syntax")
.WithHelp("Expected type name, '^' for pointer, or '[]' for array")
.Build());
}
private Token ExpectToken()
{
if (!_hasCurrentToken)
{
throw new ParseException(Diagnostic
.Error("Unexpected end of file")
.WithHelp("Expected more tokens to complete the syntax")
.Build());
}
var token = _currentToken!;
Next();
return token;
}
private SymbolToken ExpectSymbol()
{
var token = ExpectToken();
if (token is not SymbolToken symbol)
{
throw new ParseException(Diagnostic
.Error($"Expected symbol, but found {token.GetType().Name}")
.WithHelp("This position requires a symbol like '(', ')', '{', '}', etc.")
.Build());
}
return symbol;
}
private void ExpectSymbol(Symbol expectedSymbol)
{
var token = ExpectSymbol();
if (token.Symbol != expectedSymbol)
{
throw new ParseException(Diagnostic
.Error($"Expected '{expectedSymbol}', but found '{token.Symbol}'")
.WithHelp($"Insert '{expectedSymbol}' here")
.Build());
}
}
private bool TryExpectSymbol(Symbol symbol)
{
if (_currentToken is SymbolToken symbolToken && symbolToken.Symbol == symbol)
{
Next();
return true;
}
return false;
}
private bool TryExpectIdentifier([NotNullWhen(true)] out IdentifierToken? identifier)
{
if (_currentToken is IdentifierToken identifierToken)
{
identifier = identifierToken;
Next();
return true;
}
identifier = null;
return false;
}
private IdentifierToken ExpectIdentifier()
{
var token = ExpectToken();
if (token is not IdentifierToken identifier)
{
throw new ParseException(Diagnostic
.Error($"Expected identifier, but found {token.GetType().Name}")
.WithHelp("Provide a valid identifier name here")
.Build());
}
return identifier;
}
private void Next()
{
_hasCurrentToken = _tokenEnumerator.MoveNext();
_currentToken = _hasCurrentToken ? _tokenEnumerator.Current : null;
}
}
public class ParseException : Exception
{
public Diagnostic Diagnostic { get; }
public ParseException(Diagnostic diagnostic) : base(diagnostic.Message)
{
Diagnostic = diagnostic;
}
}