Files
nub-lang/src/compiler/NubLang/Syntax/Parsing/Parser.cs
nub31 5672d181fe ...
2025-07-07 18:56:47 +02:00

832 lines
26 KiB
C#

using System.Diagnostics.CodeAnalysis;
using NubLang.Common;
using NubLang.Diagnostics;
using NubLang.Syntax.Parsing.Node;
using NubLang.Syntax.Tokenization;
namespace NubLang.Syntax.Parsing;
public sealed class Parser
{
private string _namespace;
private readonly IReadOnlyList<Token> _tokens;
private readonly List<Diagnostic> _diagnostics = [];
private int _tokenIndex;
public Parser(IReadOnlyList<Token> tokens)
{
_namespace = "default";
_tokens = tokens;
}
public SyntaxTree Parse()
{
_diagnostics.Clear();
_tokenIndex = 0;
if (TryExpectSymbol(Symbol.Namespace))
{
_namespace = ExpectIdentifier().Value;
}
List<DefinitionSyntax> definitions = [];
while (Peek().HasValue)
{
try
{
definitions.Add(ParseDefinition());
}
catch (ParseException ex)
{
_diagnostics.Add(ex.Diagnostic);
RecoverToNextDefinition();
}
}
return new SyntaxTree(_namespace, definitions, _diagnostics);
}
private DefinitionSyntax ParseDefinition()
{
var startIndex = _tokenIndex;
var keyword = ExpectSymbol();
var node = keyword.Symbol switch
{
Symbol.Extern => ParseExtern(startIndex),
Symbol.Func => ParseFunc(startIndex),
Symbol.Struct => ParseStruct(startIndex),
Symbol.Trait => ParseTrait(startIndex),
Symbol.Impl => ParseImpl(startIndex),
_ => throw new ParseException(Diagnostic
.Error($"Expected 'func' or 'struct', but found '{keyword.Symbol}'")
.WithHelp("Valid definition keywords are 'func' and 'struct'")
.At(keyword)
.Build())
};
return node;
}
private FuncSignatureSyntax ParseFuncSignature(FuncParameterSyntax? thisArg = null)
{
var startIndex = _tokenIndex;
List<FuncParameterSyntax> parameters = [];
if (thisArg != null)
{
parameters.Add(thisArg);
}
ExpectSymbol(Symbol.OpenParen);
while (!TryExpectSymbol(Symbol.CloseParen))
{
parameters.Add(ParseFuncParameter());
if (!TryExpectSymbol(Symbol.Comma) && Peek().TryGetValue(out var token) && token is not SymbolToken { Symbol: Symbol.CloseParen })
{
_diagnostics.Add(Diagnostic
.Warning("Missing comma between function parameters")
.WithHelp("Add a ',' to separate parameters")
.At(token)
.Build());
}
}
var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new NubVoidType();
return new FuncSignatureSyntax(GetTokens(startIndex), parameters, returnType);
}
private FuncParameterSyntax ParseFuncParameter()
{
var startIndex = _tokenIndex;
var name = ExpectIdentifier();
ExpectSymbol(Symbol.Colon);
var type = ParseType();
return new FuncParameterSyntax(GetTokens(startIndex), name.Value, type);
}
private DefinitionSyntax ParseExtern(int startIndex)
{
var keyword = ExpectSymbol();
return keyword.Symbol switch
{
Symbol.Func => ParseExternFunc(startIndex),
_ => throw new ParseException(Diagnostic.Error($"Unexpected symbol {keyword.Symbol} after extern declaration").At(keyword).Build())
};
}
private ExternFuncSyntax ParseExternFunc(int startIndex)
{
var name = ExpectIdentifier();
var callName = name.Value;
if (TryExpectSymbol(Symbol.Calls))
{
callName = ExpectIdentifier().Value;
}
var signature = ParseFuncSignature();
return new ExternFuncSyntax(GetTokens(startIndex), _namespace, name.Value, callName, signature);
}
private LocalFuncSyntax ParseFunc(int startIndex)
{
var name = ExpectIdentifier();
var signature = ParseFuncSignature();
var body = ParseBlock();
return new LocalFuncSyntax(GetTokens(startIndex), _namespace, name.Value, signature, body);
}
private StructSyntax ParseStruct(int startIndex)
{
var name = ExpectIdentifier().Value;
ExpectSymbol(Symbol.OpenBrace);
List<StructFieldSyntax> variables = [];
var fieldIndex = 0;
while (!TryExpectSymbol(Symbol.CloseBrace))
{
var fieldStartIndex = _tokenIndex;
var variableName = ExpectIdentifier().Value;
ExpectSymbol(Symbol.Colon);
var variableType = ParseType();
var variableValue = Optional<ExpressionSyntax>.Empty();
if (TryExpectSymbol(Symbol.Assign))
{
variableValue = ParseExpression();
}
variables.Add(new StructFieldSyntax(GetTokens(fieldStartIndex), fieldIndex++, variableName, variableType, variableValue));
}
return new StructSyntax(GetTokens(startIndex), _namespace, name, variables);
}
private TraitSyntax ParseTrait(int startIndex)
{
var name = ExpectIdentifier().Value;
ExpectSymbol(Symbol.OpenBrace);
List<TraitFuncSyntax> functions = [];
while (!TryExpectSymbol(Symbol.CloseBrace))
{
var funcStartIndex = _tokenIndex;
ExpectSymbol(Symbol.Func);
var funcName = ExpectIdentifier().Value;
var signature = ParseFuncSignature();
functions.Add(new TraitFuncSyntax(GetTokens(funcStartIndex), funcName, signature));
}
return new TraitSyntax(GetTokens(startIndex), _namespace, name, functions);
}
private TraitImplSyntax ParseImpl(int startIndex)
{
var traitType = ParseType();
ExpectSymbol(Symbol.For);
var forType = ParseType();
List<TraitFuncImplSyntax> functions = [];
ExpectSymbol(Symbol.OpenBrace);
while (!TryExpectSymbol(Symbol.CloseBrace))
{
var funcStartIndex = _tokenIndex;
ExpectSymbol(Symbol.Func);
var functionName = ExpectIdentifier().Value;
var signature = ParseFuncSignature(new FuncParameterSyntax([], "this", forType));
var body = ParseBlock();
functions.AddRange(new TraitFuncImplSyntax(GetTokens(funcStartIndex), functionName, signature, body));
}
return new TraitImplSyntax(GetTokens(startIndex), _namespace, traitType, forType, functions);
}
private StatementSyntax ParseStatement()
{
var startIndex = _tokenIndex;
if (!Peek().TryGetValue(out var token))
{
throw new ParseException(Diagnostic
.Error("Unexpected end of file while parsing statement")
.At(_tokens.Last())
.Build());
}
if (token is SymbolToken symbol)
{
switch (symbol.Symbol)
{
case Symbol.Return:
return ParseReturn(startIndex);
case Symbol.If:
return ParseIf(startIndex);
case Symbol.While:
return ParseWhile(startIndex);
case Symbol.Let:
return ParseVariableDeclaration(startIndex);
case Symbol.Break:
return ParseBreak(startIndex);
case Symbol.Continue:
return ParseContinue(startIndex);
}
}
return ParseStatementExpression(startIndex);
}
private StatementSyntax ParseStatementExpression(int startIndex)
{
var expr = ParseExpression();
if (TryExpectSymbol(Symbol.Assign))
{
var value = ParseExpression();
return new AssignmentSyntax(GetTokens(startIndex), expr, value);
}
return new StatementExpressionSyntax(GetTokens(startIndex), expr);
}
private VariableDeclarationSyntax ParseVariableDeclaration(int startIndex)
{
ExpectSymbol(Symbol.Let);
var name = ExpectIdentifier().Value;
var explicitType = Optional<NubType>.Empty();
if (TryExpectSymbol(Symbol.Colon))
{
explicitType = ParseType();
}
var assignment = Optional<ExpressionSyntax>.Empty();
if (TryExpectSymbol(Symbol.Assign))
{
assignment = ParseExpression();
}
return new VariableDeclarationSyntax(GetTokens(startIndex), name, explicitType, assignment);
}
private StatementSyntax ParseBreak(int startIndex)
{
ExpectSymbol(Symbol.Break);
Next();
return new BreakSyntax(GetTokens(startIndex));
}
private StatementSyntax ParseContinue(int startIndex)
{
ExpectSymbol(Symbol.Continue);
return new ContinueSyntax(GetTokens(startIndex));
}
private ReturnSyntax ParseReturn(int startIndex)
{
ExpectSymbol(Symbol.Return);
var value = Optional<ExpressionSyntax>.Empty();
if (!TryExpectSymbol(Symbol.Semi))
{
value = ParseExpression();
}
return new ReturnSyntax(GetTokens(startIndex), value);
}
private IfSyntax ParseIf(int startIndex)
{
ExpectSymbol(Symbol.If);
var condition = ParseExpression();
var body = ParseBlock();
var elseStatement = Optional<Variant<IfSyntax, BlockSyntax>>.Empty();
if (TryExpectSymbol(Symbol.Else))
{
var newStartIndex = _tokenIndex;
elseStatement = TryExpectSymbol(Symbol.If)
? (Variant<IfSyntax, BlockSyntax>)ParseIf(newStartIndex)
: (Variant<IfSyntax, BlockSyntax>)ParseBlock();
}
return new IfSyntax(GetTokens(startIndex), condition, body, elseStatement);
}
private WhileSyntax ParseWhile(int startIndex)
{
ExpectSymbol(Symbol.While);
var condition = ParseExpression();
var body = ParseBlock();
return new WhileSyntax(GetTokens(startIndex), condition, body);
}
private ExpressionSyntax ParseExpression(int precedence = 0)
{
var startIndex = _tokenIndex;
var left = ParsePrimaryExpression();
while (true)
{
var token = Peek();
if (!token.HasValue || token.Value is not SymbolToken symbolToken || !TryGetBinaryOperator(symbolToken.Symbol, out var op) ||
GetBinaryOperatorPrecedence(op.Value) < precedence)
{
break;
}
Next();
var right = ParseExpression(GetBinaryOperatorPrecedence(op.Value) + 1);
left = new BinaryExpressionSyntax(GetTokens(startIndex), left, op.Value, right);
}
return left;
}
private int GetBinaryOperatorPrecedence(BinaryOperator @operator)
{
return @operator switch
{
BinaryOperator.Multiply => 3,
BinaryOperator.Divide => 3,
BinaryOperator.Plus => 2,
BinaryOperator.Minus => 2,
BinaryOperator.GreaterThan => 1,
BinaryOperator.GreaterThanOrEqual => 1,
BinaryOperator.LessThan => 1,
BinaryOperator.LessThanOrEqual => 1,
BinaryOperator.Equal => 0,
BinaryOperator.NotEqual => 0,
_ => throw new ArgumentOutOfRangeException(nameof(@operator), @operator, null)
};
}
private bool TryGetBinaryOperator(Symbol symbol, [NotNullWhen(true)] out BinaryOperator? binaryExpressionOperator)
{
switch (symbol)
{
case Symbol.Equal:
binaryExpressionOperator = BinaryOperator.Equal;
return true;
case Symbol.NotEqual:
binaryExpressionOperator = BinaryOperator.NotEqual;
return true;
case Symbol.LessThan:
binaryExpressionOperator = BinaryOperator.LessThan;
return true;
case Symbol.LessThanOrEqual:
binaryExpressionOperator = BinaryOperator.LessThanOrEqual;
return true;
case Symbol.GreaterThan:
binaryExpressionOperator = BinaryOperator.GreaterThan;
return true;
case Symbol.GreaterThanOrEqual:
binaryExpressionOperator = BinaryOperator.GreaterThanOrEqual;
return true;
case Symbol.Plus:
binaryExpressionOperator = BinaryOperator.Plus;
return true;
case Symbol.Minus:
binaryExpressionOperator = BinaryOperator.Minus;
return true;
case Symbol.Star:
binaryExpressionOperator = BinaryOperator.Multiply;
return true;
case Symbol.ForwardSlash:
binaryExpressionOperator = BinaryOperator.Divide;
return true;
default:
binaryExpressionOperator = null;
return false;
}
}
private ExpressionSyntax ParsePrimaryExpression()
{
var startIndex = _tokenIndex;
ExpressionSyntax expr;
var token = ExpectToken();
switch (token)
{
case LiteralToken literal:
{
expr = new LiteralSyntax(GetTokens(startIndex), literal.Value, literal.Kind);
break;
}
case IdentifierToken identifier:
{
var @namespace = Optional<string>.Empty();
var name = identifier.Value;
if (TryExpectSymbol(Symbol.DoubleColon))
{
@namespace = identifier.Value;
name = ExpectIdentifier().Value;
}
expr = new IdentifierSyntax(GetTokens(startIndex), @namespace, name);
break;
}
case SymbolToken symbolToken:
{
switch (symbolToken.Symbol)
{
case Symbol.Func:
{
List<AnonymousFuncParameterSyntax> parameters = [];
ExpectSymbol(Symbol.OpenParen);
while (!TryExpectSymbol(Symbol.CloseParen))
{
var parameterStartIndex = _tokenIndex;
var name = ExpectIdentifier();
parameters.Add(new AnonymousFuncParameterSyntax(GetTokens(parameterStartIndex), name.Value));
}
var body = ParseBlock();
expr = new AnonymousFuncSyntax(GetTokens(startIndex), parameters, body);
break;
}
case Symbol.OpenParen:
{
var expression = ParseExpression();
ExpectSymbol(Symbol.CloseParen);
expr = expression;
break;
}
case Symbol.Minus:
{
var expression = ParsePrimaryExpression();
expr = new UnaryExpressionSyntax(GetTokens(startIndex), UnaryOperator.Negate, expression);
break;
}
case Symbol.Bang:
{
var expression = ParsePrimaryExpression();
expr = new UnaryExpressionSyntax(GetTokens(startIndex), UnaryOperator.Invert, expression);
break;
}
case Symbol.OpenBracket:
{
var capacity = ParseExpression();
ExpectSymbol(Symbol.CloseBracket);
var type = ParseType();
expr = new ArrayInitializerSyntax(GetTokens(startIndex), capacity, type);
break;
}
case Symbol.Alloc:
{
var type = ParseType();
Dictionary<string, ExpressionSyntax> initializers = [];
ExpectSymbol(Symbol.OpenBrace);
while (!TryExpectSymbol(Symbol.CloseBrace))
{
var name = ExpectIdentifier().Value;
ExpectSymbol(Symbol.Assign);
var value = ParseExpression();
initializers.Add(name, value);
}
expr = new StructInitializerSyntax(GetTokens(startIndex), type, initializers);
break;
}
default:
{
throw new ParseException(Diagnostic
.Error($"Unexpected symbol '{symbolToken.Symbol}' in expression")
.WithHelp("Expected literal, identifier, or '(' to start expression")
.At(symbolToken)
.Build());
}
}
break;
}
default:
{
throw new ParseException(Diagnostic
.Error($"Unexpected token '{token.GetType().Name}' in expression")
.WithHelp("Expected literal, identifier, or parenthesized expression")
.At(token)
.Build());
}
}
return ParsePostfixOperators(startIndex, expr);
}
private ExpressionSyntax ParsePostfixOperators(int startIndex, ExpressionSyntax expr)
{
while (true)
{
if (TryExpectSymbol(Symbol.Ampersand))
{
expr = new AddressOfSyntax(GetTokens(startIndex), expr);
break;
}
if (TryExpectSymbol(Symbol.Caret))
{
expr = new DereferenceSyntax(GetTokens(startIndex), expr);
continue;
}
if (TryExpectSymbol(Symbol.Period))
{
var structMember = ExpectIdentifier().Value;
expr = new MemberAccessSyntax(GetTokens(startIndex), expr, structMember);
continue;
}
if (TryExpectSymbol(Symbol.OpenBracket))
{
var index = ParseExpression();
ExpectSymbol(Symbol.CloseBracket);
expr = new ArrayIndexAccessSyntax(GetTokens(startIndex), expr, index);
continue;
}
if (TryExpectSymbol(Symbol.OpenParen))
{
var parameters = new List<ExpressionSyntax>();
while (!TryExpectSymbol(Symbol.CloseParen))
{
parameters.Add(ParseExpression());
if (!TryExpectSymbol(Symbol.Comma) && Peek().TryGetValue(out var nextToken) && nextToken is not SymbolToken { Symbol: Symbol.CloseParen })
{
_diagnostics.Add(Diagnostic
.Warning("Missing comma between function arguments")
.WithHelp("Add a ',' to separate arguments")
.At(nextToken)
.Build());
}
}
expr = new FuncCallSyntax(GetTokens(startIndex), expr, parameters);
continue;
}
break;
}
return expr;
}
private BlockSyntax ParseBlock()
{
var startIndex = _tokenIndex;
ExpectSymbol(Symbol.OpenBrace);
List<StatementSyntax> statements = [];
while (Peek().HasValue && !TryExpectSymbol(Symbol.CloseBrace))
{
try
{
statements.Add(ParseStatement());
}
catch (ParseException ex)
{
_diagnostics.Add(ex.Diagnostic);
Next();
}
}
return new BlockSyntax(GetTokens(startIndex), statements);
}
private NubType ParseType()
{
if (TryExpectIdentifier(out var name))
{
return name.Value switch
{
"void" => new NubVoidType(),
"string" => new NubStringType(),
"cstring" => new NubCStringType(),
"i64" => new NubPrimitiveType(PrimitiveTypeKind.I64),
"i32" => new NubPrimitiveType(PrimitiveTypeKind.I32),
"i16" => new NubPrimitiveType(PrimitiveTypeKind.I16),
"i8" => new NubPrimitiveType(PrimitiveTypeKind.I8),
"u64" => new NubPrimitiveType(PrimitiveTypeKind.U64),
"u32" => new NubPrimitiveType(PrimitiveTypeKind.U32),
"u16" => new NubPrimitiveType(PrimitiveTypeKind.U16),
"u8" => new NubPrimitiveType(PrimitiveTypeKind.U8),
"f64" => new NubPrimitiveType(PrimitiveTypeKind.F64),
"f32" => new NubPrimitiveType(PrimitiveTypeKind.F32),
"bool" => new NubPrimitiveType(PrimitiveTypeKind.Bool),
_ => ParseCustomType()
};
NubCustomType ParseCustomType()
{
var @namespace = _namespace;
if (TryExpectSymbol(Symbol.DoubleColon))
{
@namespace = ExpectIdentifier().Value;
}
return new NubCustomType(@namespace, name.Value);
}
}
if (TryExpectSymbol(Symbol.Caret))
{
var baseType = ParseType();
return new NubPointerType(baseType);
}
if (TryExpectSymbol(Symbol.Func))
{
ExpectSymbol(Symbol.OpenParen);
List<NubType> parameters = [];
while (!TryExpectSymbol(Symbol.CloseParen))
{
var parameter = ParseType();
parameters.Add(parameter);
if (!TryExpectSymbol(Symbol.Comma) && Peek().TryGetValue(out var nextToken) && nextToken is not SymbolToken { Symbol: Symbol.CloseParen })
{
_diagnostics.Add(Diagnostic
.Warning("Missing comma between func type arguments")
.WithHelp("Add a ',' to separate arguments")
.At(nextToken)
.Build());
}
}
var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new NubVoidType();
return new NubFuncType(returnType, parameters);
}
if (TryExpectSymbol(Symbol.OpenBracket))
{
ExpectSymbol(Symbol.CloseBracket);
var baseType = ParseType();
return new NubArrayType(baseType);
}
if (!Peek().TryGetValue(out var peekToken))
{
throw new ParseException(Diagnostic
.Error("Unexpected end of file while parsing type")
.WithHelp("Expected a type name")
.At(_tokens.Last())
.Build());
}
throw new ParseException(Diagnostic
.Error("Invalid type Syntax")
.WithHelp("Expected type name, '^' for pointer, or '[]' for array")
.At(peekToken)
.Build());
}
private Token ExpectToken()
{
if (!Peek().TryGetValue(out var token))
{
throw new ParseException(Diagnostic
.Error("Unexpected end of file")
.WithHelp("Expected more tokens to complete the Syntax")
.At(_tokens.Last())
.Build());
}
Next();
return token;
}
private SymbolToken ExpectSymbol()
{
var token = ExpectToken();
if (token is not SymbolToken symbol)
{
throw new ParseException(Diagnostic
.Error($"Expected symbol, but found {token.GetType().Name}")
.WithHelp("This position requires a symbol like '(', ')', '{', '}', etc.")
.At(token)
.Build());
}
return symbol;
}
private void ExpectSymbol(Symbol expectedSymbol)
{
var token = ExpectSymbol();
if (token.Symbol != expectedSymbol)
{
throw new ParseException(Diagnostic
.Error($"Expected '{expectedSymbol}', but found '{token.Symbol}'")
.WithHelp($"Insert '{expectedSymbol}' here")
.At(token)
.Build());
}
}
private bool TryExpectSymbol(Symbol symbol)
{
if (Peek() is { Value: SymbolToken symbolToken } && symbolToken.Symbol == symbol)
{
Next();
return true;
}
return false;
}
private bool TryExpectIdentifier([NotNullWhen(true)] out IdentifierToken? identifier)
{
if (Peek() is { Value: IdentifierToken identifierToken })
{
identifier = identifierToken;
Next();
return true;
}
identifier = null;
return false;
}
private IdentifierToken ExpectIdentifier()
{
var token = ExpectToken();
if (token is not IdentifierToken identifier)
{
throw new ParseException(Diagnostic
.Error($"Expected identifier, but found {token.GetType().Name}")
.WithHelp("Provide a valid identifier name here")
.At(token)
.Build());
}
return identifier;
}
private void RecoverToNextDefinition()
{
while (Peek().HasValue)
{
var token = Peek().Value;
if (token is SymbolToken { Symbol: Symbol.Extern or Symbol.Func or Symbol.Struct or Symbol.Trait or Symbol.Impl })
{
break;
}
Next();
}
}
private Optional<Token> Peek(int offset = 0)
{
var peekIndex = _tokenIndex + offset;
if (peekIndex < _tokens.Count())
{
return _tokens.ElementAt(peekIndex);
}
return Optional<Token>.Empty();
}
private void Next()
{
_tokenIndex++;
}
private List<Token> GetTokens(int startIndex)
{
return _tokens.Skip(startIndex).Take(Math.Min(_tokenIndex, _tokens.Count() - 1) - startIndex).ToList();
}
}
public class ParseException : Exception
{
public Diagnostic Diagnostic { get; }
public ParseException(Diagnostic diagnostic) : base(diagnostic.Message)
{
Diagnostic = diagnostic;
}
}