Files
nub-lang/src/compiler/Nub.Lang/Frontend/Parsing/Parser.cs
nub31 15a4bd7ea2 ...
2025-05-25 02:13:26 +02:00

749 lines
25 KiB
C#

using System.Diagnostics.CodeAnalysis;
using Nub.Lang.Frontend.Diagnostics;
using Nub.Lang.Frontend.Lexing;
namespace Nub.Lang.Frontend.Parsing;
public class Parser
{
private List<Token> _tokens = [];
private int _index;
private List<Diagnostic> _diagnostics = [];
public DiagnosticsResult<ModuleNode> ParseModule(List<Token> tokens, string rootFilePath)
{
_index = 0;
_tokens = tokens;
_diagnostics = [];
List<DefinitionNode> definitions = [];
List<string> imports = [];
while (Peek().HasValue)
{
try
{
if (TryExpectSymbol(Symbol.Import))
{
var name = ExpectIdentifier();
imports.Add(name.Value);
}
else
{
definitions.Add(ParseDefinition());
}
}
catch (ParseException ex)
{
_diagnostics.Add(ex.Diagnostic);
RecoverToNextDefinition();
}
}
return new DiagnosticsResult<ModuleNode>(_diagnostics, new ModuleNode(GetTokensForNode(0), rootFilePath, imports, definitions));
}
private DefinitionNode ParseDefinition()
{
var startIndex = _index;
List<ModifierToken> modifiers = [];
List<string> documentationParts = [];
while (_index < _tokens.Count && _tokens[_index] is DocumentationToken commentToken)
{
documentationParts.Add(commentToken.Documentation);
_index++;
}
var documentation = documentationParts.Count == 0 ? null : string.Join('\n', documentationParts);
while (TryExpectModifier(out var modifier))
{
modifiers.Add(modifier);
}
var keyword = ExpectSymbol();
return keyword.Symbol switch
{
Symbol.Func => ParseFuncDefinition(startIndex, modifiers, Optional.OfNullable(documentation)),
Symbol.Struct => ParseStruct(startIndex, modifiers, Optional.OfNullable(documentation)),
_ => throw new ParseException(Diagnostic
.Error($"Expected 'func' or 'struct', but found '{keyword.Symbol}'")
.WithHelp("Valid definition keywords are 'func' and 'struct'")
.At(keyword)
.Build())
};
}
private DefinitionNode ParseFuncDefinition(int startIndex, List<ModifierToken> modifiers, Optional<string> documentation)
{
var name = ExpectIdentifier();
List<FuncParameter> parameters = [];
ExpectSymbol(Symbol.OpenParen);
if (!TryExpectSymbol(Symbol.CloseParen))
{
while (!TryExpectSymbol(Symbol.CloseParen))
{
parameters.Add(ParseFuncParameter());
if (!TryExpectSymbol(Symbol.Comma) && Peek().TryGetValue(out var token) && token is not SymbolToken { Symbol: Symbol.CloseParen })
{
_diagnostics.Add(Diagnostic
.Warning("Missing comma between function parameters")
.WithHelp("Add a ',' to separate parameters")
.At(token)
.Build());
}
}
}
var returnType = Optional<NubType>.Empty();
if (TryExpectSymbol(Symbol.Colon))
{
returnType = ParseType();
}
var isExtern = modifiers.RemoveAll(x => x.Modifier == Modifier.Extern) > 0;
if (isExtern)
{
if (modifiers.Count != 0)
{
throw new ParseException(Diagnostic
.Error($"Invalid modifier for extern function: {modifiers[0].Modifier}")
.WithHelp($"Extern functions cannot use the '{modifiers[0].Modifier}' modifier")
.At(modifiers[0])
.Build());
}
return new ExternFuncDefinitionNode(GetTokensForNode(startIndex), documentation, name.Value, parameters, returnType);
}
var body = ParseBlock();
var isGlobal = modifiers.RemoveAll(x => x.Modifier == Modifier.Global) > 0;
if (modifiers.Count != 0)
{
throw new ParseException(Diagnostic
.Error($"Invalid modifiers for function: {modifiers[0].Modifier}")
.WithHelp($"Functions cannot use the '{modifiers[0].Modifier}' modifier")
.At(modifiers[0])
.Build());
}
return new LocalFuncDefinitionNode(GetTokensForNode(startIndex), documentation, name.Value, parameters, body, returnType, isGlobal);
}
private StructDefinitionNode ParseStruct(int startIndex, List<ModifierToken> _, Optional<string> documentation)
{
var name = ExpectIdentifier().Value;
ExpectSymbol(Symbol.OpenBrace);
List<StructField> variables = [];
while (!TryExpectSymbol(Symbol.CloseBrace))
{
var variableName = ExpectIdentifier().Value;
ExpectSymbol(Symbol.Colon);
var variableType = ParseType();
var variableValue = Optional<ExpressionNode>.Empty();
if (TryExpectSymbol(Symbol.Assign))
{
variableValue = ParseExpression();
}
variables.Add(new StructField(variableName, variableType, variableValue));
}
return new StructDefinitionNode(GetTokensForNode(startIndex), documentation, name, variables);
}
private FuncParameter ParseFuncParameter()
{
var variadic = false;
if (TryExpectSymbol(Symbol.Period))
{
ExpectSymbol(Symbol.Period);
ExpectSymbol(Symbol.Period);
variadic = true;
}
var name = ExpectIdentifier();
ExpectSymbol(Symbol.Colon);
var type = ParseType();
return new FuncParameter(name.Value, type, variadic);
}
private StatementNode ParseStatement()
{
var startIndex = _index;
var token = ExpectToken();
switch (token)
{
case IdentifierToken identifier:
{
var symbol = ExpectSymbol();
switch (symbol.Symbol)
{
case Symbol.OpenParen:
{
var parameters = new List<ExpressionNode>();
while (!TryExpectSymbol(Symbol.CloseParen))
{
parameters.Add(ParseExpression());
TryExpectSymbol(Symbol.Comma);
}
return new FuncCallStatementNode(GetTokensForNode(startIndex), new FuncCall(identifier.Value, parameters));
}
case Symbol.Assign:
{
var value = ParseExpression();
return new VariableAssignmentNode(GetTokensForNode(startIndex), identifier.Value, Optional<NubType>.Empty(), value);
}
case Symbol.Colon:
{
var type = ParseType();
ExpectSymbol(Symbol.Assign);
var value = ParseExpression();
return new VariableAssignmentNode(GetTokensForNode(startIndex), identifier.Value, type, value);
}
default:
{
throw new ParseException(Diagnostic
.Error($"Unexpected symbol '{symbol.Symbol}' after identifier")
.WithHelp("Expected '(', '=', or ':' after identifier")
.At(symbol)
.Build());
}
}
}
case SymbolToken symbol:
{
return symbol.Symbol switch
{
Symbol.Return => ParseReturn(startIndex),
Symbol.If => ParseIf(startIndex),
Symbol.While => ParseWhile(startIndex),
Symbol.Break => new BreakNode(GetTokensForNode(startIndex)),
Symbol.Continue => new ContinueNode(GetTokensForNode(startIndex)),
_ => throw new ParseException(Diagnostic
.Error($"Unexpected symbol '{symbol.Symbol}' at start of statement")
.WithHelp("Expected identifier, 'return', 'if', 'while', 'break', or 'continue'")
.At(symbol)
.Build())
};
}
default:
{
throw new ParseException(Diagnostic
.Error($"Unexpected token '{token.GetType().Name}' at start of statement")
.WithHelp("Statements must start with an identifier or keyword")
.At(token)
.Build());
}
}
}
private ReturnNode ParseReturn(int startIndex)
{
var value = Optional<ExpressionNode>.Empty();
if (!TryExpectSymbol(Symbol.Semicolon))
{
value = ParseExpression();
}
return new ReturnNode(GetTokensForNode(startIndex), value);
}
private IfNode ParseIf(int startIndex)
{
var condition = ParseExpression();
var body = ParseBlock();
var elseStatement = Optional<Variant<IfNode, BlockNode>>.Empty();
if (TryExpectSymbol(Symbol.Else))
{
var newStartIndex = _index;
elseStatement = TryExpectSymbol(Symbol.If)
? (Variant<IfNode, BlockNode>)ParseIf(newStartIndex)
: (Variant<IfNode, BlockNode>)ParseBlock();
}
return new IfNode(GetTokensForNode(startIndex), condition, body, elseStatement);
}
private WhileNode ParseWhile(int startIndex)
{
var condition = ParseExpression();
var body = ParseBlock();
return new WhileNode(GetTokensForNode(startIndex), condition, body);
}
private ExpressionNode ParseExpression(int precedence = 0)
{
var startIndex = _index;
var left = ParsePrimaryExpression();
while (true)
{
var token = Peek();
if (!token.HasValue || token.Value is not SymbolToken symbolToken || !TryGetBinaryOperator(symbolToken.Symbol, out var op) ||
GetBinaryOperatorPrecedence(op.Value) < precedence)
{
break;
}
Next();
var right = ParseExpression(GetBinaryOperatorPrecedence(op.Value) + 1);
left = new BinaryExpressionNode(GetTokensForNode(startIndex), left, op.Value, right);
}
return left;
}
private static int GetBinaryOperatorPrecedence(BinaryExpressionOperator binaryExpressionOperator)
{
return binaryExpressionOperator switch
{
BinaryExpressionOperator.Multiply => 3,
BinaryExpressionOperator.Divide => 3,
BinaryExpressionOperator.Plus => 2,
BinaryExpressionOperator.Minus => 2,
BinaryExpressionOperator.GreaterThan => 1,
BinaryExpressionOperator.GreaterThanOrEqual => 1,
BinaryExpressionOperator.LessThan => 1,
BinaryExpressionOperator.LessThanOrEqual => 1,
BinaryExpressionOperator.Equal => 0,
BinaryExpressionOperator.NotEqual => 0,
_ => throw new ArgumentOutOfRangeException(nameof(binaryExpressionOperator), binaryExpressionOperator, null)
};
}
private static bool TryGetBinaryOperator(Symbol symbol, [NotNullWhen(true)] out BinaryExpressionOperator? binaryExpressionOperator)
{
switch (symbol)
{
case Symbol.Equal:
binaryExpressionOperator = BinaryExpressionOperator.Equal;
return true;
case Symbol.NotEqual:
binaryExpressionOperator = BinaryExpressionOperator.NotEqual;
return true;
case Symbol.LessThan:
binaryExpressionOperator = BinaryExpressionOperator.LessThan;
return true;
case Symbol.LessThanOrEqual:
binaryExpressionOperator = BinaryExpressionOperator.LessThanOrEqual;
return true;
case Symbol.GreaterThan:
binaryExpressionOperator = BinaryExpressionOperator.GreaterThan;
return true;
case Symbol.GreaterThanOrEqual:
binaryExpressionOperator = BinaryExpressionOperator.GreaterThanOrEqual;
return true;
case Symbol.Plus:
binaryExpressionOperator = BinaryExpressionOperator.Plus;
return true;
case Symbol.Minus:
binaryExpressionOperator = BinaryExpressionOperator.Minus;
return true;
case Symbol.Star:
binaryExpressionOperator = BinaryExpressionOperator.Multiply;
return true;
case Symbol.ForwardSlash:
binaryExpressionOperator = BinaryExpressionOperator.Divide;
return true;
default:
binaryExpressionOperator = null;
return false;
}
}
private ExpressionNode ParsePrimaryExpression()
{
var startIndex = _index;
ExpressionNode expr;
var token = ExpectToken();
switch (token)
{
case LiteralToken literal:
{
expr = new LiteralNode(GetTokensForNode(startIndex), literal.Value, literal.Type);
break;
}
case IdentifierToken identifier:
{
var next = Peek();
switch (next.Value)
{
case SymbolToken { Symbol: Symbol.OpenParen }:
{
Next();
List<ExpressionNode> parameters = [];
while (!TryExpectSymbol(Symbol.CloseParen))
{
parameters.Add(ParseExpression());
if (!TryExpectSymbol(Symbol.Comma) && Peek().TryGetValue(out var nextToken) && nextToken is not SymbolToken { Symbol: Symbol.CloseParen })
{
_diagnostics.Add(Diagnostic
.Warning("Missing comma between function arguments")
.WithHelp("Add a ',' to separate arguments")
.At(nextToken)
.Build());
}
}
expr = new FuncCallExpressionNode(GetTokensForNode(startIndex), new FuncCall(identifier.Value, parameters));
break;
}
default:
{
expr = new IdentifierNode(GetTokensForNode(startIndex), identifier.Value);
break;
}
}
break;
}
case SymbolToken symbolToken:
{
switch (symbolToken.Symbol)
{
case Symbol.OpenParen:
{
var expression = ParseExpression();
ExpectSymbol(Symbol.CloseParen);
expr = expression;
break;
}
case Symbol.LessThan:
{
var type = ParseType();
ExpectSymbol(Symbol.GreaterThan);
ExpectSymbol(Symbol.OpenParen);
var expressionToCast = ParseExpression();
ExpectSymbol(Symbol.CloseParen);
expr = new CastNode(GetTokensForNode(startIndex), type, expressionToCast);
break;
}
case Symbol.New:
{
var type = ParseType();
Dictionary<string, ExpressionNode> initializers = [];
ExpectSymbol(Symbol.OpenBrace);
while (!TryExpectSymbol(Symbol.CloseBrace))
{
var name = ExpectIdentifier().Value;
ExpectSymbol(Symbol.Assign);
var value = ParseExpression();
initializers.Add(name, value);
}
expr = new StructInitializerNode(GetTokensForNode(startIndex), type, initializers);
break;
}
case Symbol.Ampersand:
{
var expression = ParsePrimaryExpression();
expr = new AddressOfNode(GetTokensForNode(startIndex), expression);
break;
}
case Symbol.Minus:
{
var expression = ParsePrimaryExpression();
expr = new UnaryExpressionNode(GetTokensForNode(startIndex), UnaryExpressionOperator.Negate, expression);
break;
}
case Symbol.Bang:
{
var expression = ParsePrimaryExpression();
expr = new UnaryExpressionNode(GetTokensForNode(startIndex), UnaryExpressionOperator.Invert, expression);
break;
}
default:
{
throw new ParseException(Diagnostic
.Error($"Unexpected symbol '{symbolToken.Symbol}' in expression")
.WithHelp("Expected literal, identifier, or '(' to start expression")
.At(symbolToken)
.Build());
}
}
break;
}
default:
{
throw new ParseException(Diagnostic
.Error($"Unexpected token '{token.GetType().Name}' in expression")
.WithHelp("Expected literal, identifier, or parenthesized expression")
.At(token)
.Build());
}
}
return ParsePostfixOperators(startIndex, expr);
}
private ExpressionNode ParsePostfixOperators(int startIndex, ExpressionNode expr)
{
while (true)
{
if (TryExpectSymbol(Symbol.Caret))
{
expr = new DereferenceNode(GetTokensForNode(startIndex), expr);
continue;
}
if (TryExpectSymbol(Symbol.Period))
{
var structMember = ExpectIdentifier().Value;
expr = new MemberAccessNode(GetTokensForNode(startIndex), expr, structMember);
continue;
}
if (TryExpectSymbol(Symbol.OpenBracket))
{
var index = ParseExpression();
ExpectSymbol(Symbol.CloseBracket);
expr = new ArrayIndexNode(GetTokensForNode(startIndex), expr, index);
continue;
}
break;
}
return expr;
}
private BlockNode ParseBlock()
{
var startIndex = _index;
ExpectSymbol(Symbol.OpenBrace);
List<StatementNode> statements = [];
while (!TryExpectSymbol(Symbol.CloseBrace))
{
try
{
statements.Add(ParseStatement());
}
catch (ParseException ex)
{
_diagnostics.Add(ex.Diagnostic);
RecoverToNextStatement();
}
}
return new BlockNode(GetTokensForNode(startIndex), statements);
}
private NubType ParseType()
{
if (TryExpectIdentifier(out var name))
{
return NubType.Parse(name);
}
if (TryExpectSymbol(Symbol.Caret))
{
var baseType = ParseType();
return new NubPointerType(baseType);
}
if (TryExpectSymbol(Symbol.OpenBracket))
{
ExpectSymbol(Symbol.CloseBracket);
var baseType = ParseType();
return new NubArrayType(baseType);
}
if (!Peek().TryGetValue(out var token))
{
throw new ParseException(Diagnostic
.Error("Unexpected end of file while parsing type")
.WithHelp("Expected a type name")
.At(_tokens.Last().SourceFile, SourceLocationCalculator.GetSpan(_tokens.Last()))
.Build());
}
throw new ParseException(Diagnostic
.Error("Invalid type syntax")
.WithHelp("Expected type name, '^' for pointer, or '[]' for array")
.At(token)
.Build());
}
private Token ExpectToken()
{
if (!Peek().TryGetValue(out var token))
{
throw new ParseException(Diagnostic
.Error("Unexpected end of file")
.WithHelp("Expected more tokens to complete the syntax")
.At(_tokens.Last().SourceFile, SourceLocationCalculator.GetSpan(_tokens.Last()))
.Build());
}
Next();
return token;
}
private SymbolToken ExpectSymbol()
{
var token = ExpectToken();
if (token is not SymbolToken symbol)
{
throw new ParseException(Diagnostic
.Error($"Expected symbol, but found {token.GetType().Name}")
.WithHelp("This position requires a symbol like '(', ')', '{', '}', etc.")
.At(token)
.Build());
}
return symbol;
}
private void ExpectSymbol(Symbol expectedSymbol)
{
var token = ExpectSymbol();
if (token.Symbol != expectedSymbol)
{
throw new ParseException(Diagnostic
.Error($"Expected '{expectedSymbol}', but found '{token.Symbol}'")
.WithHelp($"Insert '{expectedSymbol}' here")
.At(token)
.Build());
}
}
private bool TryExpectSymbol(Symbol symbol)
{
if (Peek() is { Value: SymbolToken symbolToken } && symbolToken.Symbol == symbol)
{
Next();
return true;
}
return false;
}
private bool TryExpectModifier([NotNullWhen(true)] out ModifierToken? modifier)
{
if (Peek() is { Value: ModifierToken modifierToken })
{
modifier = modifierToken;
Next();
return true;
}
modifier = null;
return false;
}
private bool TryExpectIdentifier([NotNullWhen(true)] out string? identifier)
{
if (Peek() is { Value: IdentifierToken identifierToken })
{
identifier = identifierToken.Value;
Next();
return true;
}
identifier = null;
return false;
}
private IdentifierToken ExpectIdentifier()
{
var token = ExpectToken();
if (token is not IdentifierToken identifier)
{
throw new ParseException(Diagnostic
.Error($"Expected identifier, but found {token.GetType().Name}")
.WithHelp("Provide a valid identifier name here")
.At(token)
.Build());
}
return identifier;
}
private void RecoverToNextDefinition()
{
while (Peek().HasValue)
{
var token = Peek().Value;
if (token is SymbolToken { Symbol: Symbol.Func or Symbol.Struct })
{
break;
}
Next();
}
}
private void RecoverToNextStatement()
{
while (Peek().TryGetValue(out var token))
{
if (token is SymbolToken { Symbol: Symbol.CloseBrace } or IdentifierToken or SymbolToken
{
Symbol: Symbol.Return or Symbol.If or Symbol.While or Symbol.Break or Symbol.Continue
})
{
break;
}
Next();
}
}
private Optional<Token> Peek()
{
var peekIndex = _index;
while (peekIndex < _tokens.Count && _tokens[peekIndex] is DocumentationToken)
{
peekIndex++;
}
if (peekIndex < _tokens.Count)
{
return _tokens[peekIndex];
}
return Optional<Token>.Empty();
}
private void Next()
{
while (_index < _tokens.Count && _tokens[_index] is DocumentationToken)
{
_index++;
}
_index++;
}
private IReadOnlyList<Token> GetTokensForNode(int startIndex)
{
return _tokens[startIndex..Math.Min(_index, _tokens.Count - 1)];
}
}
public class ParseException : Exception
{
public Diagnostic Diagnostic { get; }
public ParseException(Diagnostic diagnostic) : base(diagnostic.Message)
{
Diagnostic = diagnostic;
}
}