This repository has been archived on 2025-10-24. You can view files and clone it, but cannot push or open issues or pull requests.
Files
nub-lang-archive-2/compiler/NubLang/Syntax/Parser.cs
nub31 e2da6cccff ...
2025-10-22 11:22:06 +02:00

878 lines
28 KiB
C#

using System.Diagnostics.CodeAnalysis;
using NubLang.Diagnostics;
namespace NubLang.Syntax;
public sealed class Parser
{
private List<Token> _tokens = [];
private int _tokenIndex;
private string _moduleName = string.Empty;
private Token? CurrentToken => _tokenIndex < _tokens.Count ? _tokens[_tokenIndex] : null;
private bool HasToken => CurrentToken != null;
public List<Diagnostic> Diagnostics { get; } = [];
public SyntaxTree Parse(List<Token> tokens)
{
Diagnostics.Clear();
_tokens = tokens;
_tokenIndex = 0;
_moduleName = string.Empty;
var imports = new List<string>();
try
{
while (TryExpectSymbol(Symbol.Import))
{
imports.Add(ExpectStringLiteral().Value);
}
ExpectSymbol(Symbol.Module);
_moduleName = ExpectStringLiteral().Value;
}
catch (ParseException e)
{
Diagnostics.Add(e.Diagnostic);
while (HasToken)
{
if (CurrentToken is SymbolToken { Symbol: Symbol.Module or Symbol.Import })
{
break;
}
Next();
}
}
var definitions = new List<DefinitionSyntax>();
while (HasToken)
{
try
{
var startIndex = _tokenIndex;
var exported = TryExpectSymbol(Symbol.Export);
if (TryExpectSymbol(Symbol.Extern))
{
var externSymbol = ExpectStringLiteral();
ExpectSymbol(Symbol.Func);
definitions.Add(ParseFunc(startIndex, exported, externSymbol.Value));
continue;
}
var keyword = ExpectSymbol();
DefinitionSyntax definition = keyword.Symbol switch
{
Symbol.Func => ParseFunc(startIndex, exported, null),
Symbol.Struct => ParseStruct(startIndex, exported),
_ => throw new ParseException(Diagnostic
.Error($"Expected 'func' or 'struct' but found '{keyword.Symbol}'")
.WithHelp("Valid definition keywords are 'func' and 'struct'")
.At(keyword)
.Build())
};
definitions.Add(definition);
}
catch (ParseException e)
{
Diagnostics.Add(e.Diagnostic);
while (HasToken)
{
if (CurrentToken is SymbolToken { Symbol: Symbol.Extern or Symbol.Func or Symbol.Struct })
{
break;
}
Next();
}
}
}
return new SyntaxTree(definitions, _moduleName, imports);
}
private FuncParameterSyntax ParseFuncParameter()
{
var startIndex = _tokenIndex;
var name = ExpectIdentifier();
ExpectSymbol(Symbol.Colon);
var type = ParseType();
return new FuncParameterSyntax(GetTokens(startIndex), name.Value, type);
}
private FuncSyntax ParseFunc(int startIndex, bool exported, string? externSymbol)
{
var name = ExpectIdentifier();
List<FuncParameterSyntax> parameters = [];
ExpectSymbol(Symbol.OpenParen);
while (!TryExpectSymbol(Symbol.CloseParen))
{
parameters.Add(ParseFuncParameter());
if (!TryExpectSymbol(Symbol.Comma))
{
ExpectSymbol(Symbol.CloseParen);
break;
}
}
var returnType = TryExpectSymbol(Symbol.Colon) ? ParseType() : new VoidTypeSyntax([]);
var prototype = new FuncPrototypeSyntax(GetTokens(startIndex), name.Value, exported, externSymbol, parameters, returnType);
BlockSyntax? body = null;
var bodyStartIndex = _tokenIndex;
if (TryExpectSymbol(Symbol.OpenBrace))
{
body = ParseBlock(bodyStartIndex);
}
return new FuncSyntax(GetTokens(startIndex), prototype, body);
}
private StructSyntax ParseStruct(int startIndex, bool exported)
{
var name = ExpectIdentifier();
ExpectSymbol(Symbol.OpenBrace);
List<StructFieldSyntax> fields = [];
while (!TryExpectSymbol(Symbol.CloseBrace))
{
var memberStartIndex = _tokenIndex;
var fieldName = ExpectIdentifier().Value;
ExpectSymbol(Symbol.Colon);
var fieldType = ParseType();
ExpressionSyntax? fieldValue = null;
if (TryExpectSymbol(Symbol.Assign))
{
fieldValue = ParseExpression();
}
fields.Add(new StructFieldSyntax(GetTokens(memberStartIndex), fieldName, fieldType, fieldValue));
}
return new StructSyntax(GetTokens(startIndex), name.Value, exported, fields);
}
private StatementSyntax ParseStatement()
{
var startIndex = _tokenIndex;
if (TryExpectSymbol(out var symbol))
{
switch (symbol)
{
case Symbol.OpenBrace:
return ParseBlock(startIndex);
case Symbol.Return:
return ParseReturn(startIndex);
case Symbol.If:
return ParseIf(startIndex);
case Symbol.While:
return ParseWhile(startIndex);
case Symbol.Let:
return ParseVariableDeclaration(startIndex);
case Symbol.Defer:
return ParseDefer(startIndex);
case Symbol.Break:
return new BreakSyntax(GetTokens(startIndex));
case Symbol.Continue:
return new ContinueSyntax(GetTokens(startIndex));
}
}
var expr = ParseExpression();
if (TryExpectSymbol(Symbol.Assign))
{
var value = ParseExpression();
return new AssignmentSyntax(GetTokens(startIndex), expr, value);
}
return new StatementExpressionSyntax(GetTokens(startIndex), expr);
}
private VariableDeclarationSyntax ParseVariableDeclaration(int startIndex)
{
var name = ExpectIdentifier().Value;
TypeSyntax? explicitType = null;
if (TryExpectSymbol(Symbol.Colon))
{
explicitType = ParseType();
}
ExpressionSyntax? assignment = null;
if (TryExpectSymbol(Symbol.Assign))
{
assignment = ParseExpression();
}
return new VariableDeclarationSyntax(GetTokens(startIndex), name, explicitType, assignment);
}
private DeferSyntax ParseDefer(int startIndex)
{
var statement = ParseStatement();
return new DeferSyntax(GetTokens(startIndex), statement);
}
private ReturnSyntax ParseReturn(int startIndex)
{
ExpressionSyntax? value = null;
if (!TryExpectSymbol(Symbol.Semi))
{
value = ParseExpression();
}
return new ReturnSyntax(GetTokens(startIndex), value);
}
private IfSyntax ParseIf(int startIndex)
{
var condition = ParseExpression();
var body = ParseBlock();
Variant<IfSyntax, BlockSyntax>? elseStatement = null;
var elseStartIndex = _tokenIndex;
if (TryExpectSymbol(Symbol.Else))
{
if (TryExpectSymbol(Symbol.If))
{
elseStatement = (Variant<IfSyntax, BlockSyntax>)ParseIf(elseStartIndex);
}
else
{
elseStatement = (Variant<IfSyntax, BlockSyntax>)ParseBlock();
}
}
return new IfSyntax(GetTokens(startIndex), condition, body, elseStatement);
}
private WhileSyntax ParseWhile(int startIndex)
{
var condition = ParseExpression();
var body = ParseBlock();
return new WhileSyntax(GetTokens(startIndex), condition, body);
}
private ExpressionSyntax ParseExpression(int precedence = 0)
{
var startIndex = _tokenIndex;
var left = ParsePrimaryExpression();
while (CurrentToken is SymbolToken symbolToken && TryGetBinaryOperator(symbolToken.Symbol, out var op) && GetBinaryOperatorPrecedence(op.Value) >= precedence)
{
Next();
var right = ParseExpression(GetBinaryOperatorPrecedence(op.Value) + 1);
left = new BinaryExpressionSyntax(GetTokens(startIndex), left, op.Value, right);
}
return left;
}
private static int GetBinaryOperatorPrecedence(BinaryOperatorSyntax operatorSyntax)
{
return operatorSyntax switch
{
BinaryOperatorSyntax.Multiply => 10,
BinaryOperatorSyntax.Divide => 10,
BinaryOperatorSyntax.Modulo => 10,
BinaryOperatorSyntax.Plus => 9,
BinaryOperatorSyntax.Minus => 9,
BinaryOperatorSyntax.LeftShift => 8,
BinaryOperatorSyntax.RightShift => 8,
BinaryOperatorSyntax.GreaterThan => 7,
BinaryOperatorSyntax.GreaterThanOrEqual => 7,
BinaryOperatorSyntax.LessThan => 7,
BinaryOperatorSyntax.LessThanOrEqual => 7,
BinaryOperatorSyntax.Equal => 7,
BinaryOperatorSyntax.NotEqual => 7,
BinaryOperatorSyntax.BitwiseAnd => 6,
BinaryOperatorSyntax.BitwiseXor => 5,
BinaryOperatorSyntax.BitwiseOr => 4,
BinaryOperatorSyntax.LogicalAnd => 3,
BinaryOperatorSyntax.LogicalOr => 2,
_ => throw new ArgumentOutOfRangeException(nameof(operatorSyntax), operatorSyntax, null)
};
}
private bool TryGetBinaryOperator(Symbol symbol, [NotNullWhen(true)] out BinaryOperatorSyntax? binaryExpressionOperator)
{
switch (symbol)
{
case Symbol.Equal:
binaryExpressionOperator = BinaryOperatorSyntax.Equal;
return true;
case Symbol.NotEqual:
binaryExpressionOperator = BinaryOperatorSyntax.NotEqual;
return true;
case Symbol.LessThan:
binaryExpressionOperator = BinaryOperatorSyntax.LessThan;
return true;
case Symbol.LessThanOrEqual:
binaryExpressionOperator = BinaryOperatorSyntax.LessThanOrEqual;
return true;
case Symbol.GreaterThan:
binaryExpressionOperator = BinaryOperatorSyntax.GreaterThan;
return true;
case Symbol.GreaterThanOrEqual:
binaryExpressionOperator = BinaryOperatorSyntax.GreaterThanOrEqual;
return true;
case Symbol.And:
binaryExpressionOperator = BinaryOperatorSyntax.LogicalAnd;
return true;
case Symbol.Or:
binaryExpressionOperator = BinaryOperatorSyntax.LogicalOr;
return true;
case Symbol.Plus:
binaryExpressionOperator = BinaryOperatorSyntax.Plus;
return true;
case Symbol.Minus:
binaryExpressionOperator = BinaryOperatorSyntax.Minus;
return true;
case Symbol.Star:
binaryExpressionOperator = BinaryOperatorSyntax.Multiply;
return true;
case Symbol.ForwardSlash:
binaryExpressionOperator = BinaryOperatorSyntax.Divide;
return true;
case Symbol.Percent:
binaryExpressionOperator = BinaryOperatorSyntax.Modulo;
return true;
case Symbol.LeftShift:
binaryExpressionOperator = BinaryOperatorSyntax.LeftShift;
return true;
case Symbol.RightShift:
binaryExpressionOperator = BinaryOperatorSyntax.RightShift;
return true;
case Symbol.Ampersand:
binaryExpressionOperator = BinaryOperatorSyntax.BitwiseAnd;
return true;
case Symbol.Pipe:
binaryExpressionOperator = BinaryOperatorSyntax.BitwiseOr;
return true;
case Symbol.Caret:
binaryExpressionOperator = BinaryOperatorSyntax.BitwiseXor;
return true;
default:
binaryExpressionOperator = null;
return false;
}
}
private ExpressionSyntax ParsePrimaryExpression()
{
var startIndex = _tokenIndex;
var token = ExpectToken();
var expr = token switch
{
BoolLiteralToken boolLiteral => new BoolLiteralSyntax(GetTokens(startIndex), boolLiteral.Value),
StringLiteralToken stringLiteral => new StringLiteralSyntax(GetTokens(startIndex), stringLiteral.Value),
FloatLiteralToken floatLiteral => new FloatLiteralSyntax(GetTokens(startIndex), floatLiteral.Value),
IntLiteralToken intLiteral => new IntLiteralSyntax(GetTokens(startIndex), intLiteral.Value, intLiteral.Base),
IdentifierToken identifier => ParseIdentifier(startIndex, identifier),
SymbolToken symbolToken => symbolToken.Symbol switch
{
Symbol.OpenParen => ParseParenthesizedExpression(),
Symbol.Minus => new UnaryExpressionSyntax(GetTokens(startIndex), UnaryOperatorSyntax.Negate, ParsePrimaryExpression()),
Symbol.Bang => new UnaryExpressionSyntax(GetTokens(startIndex), UnaryOperatorSyntax.Invert, ParsePrimaryExpression()),
Symbol.OpenBracket => ParseArrayInitializer(startIndex),
Symbol.OpenBrace => new StructInitializerSyntax(GetTokens(startIndex), null, ParseStructInitializerBody()),
Symbol.Struct => ParseStructInitializer(startIndex),
Symbol.At => ParseBuiltinFunction(startIndex),
_ => throw new ParseException(Diagnostic
.Error($"Unexpected symbol '{symbolToken.Symbol}' in expression")
.WithHelp("Expected '(', '-', '!', '[' or '{'")
.At(symbolToken)
.Build())
},
_ => throw new ParseException(Diagnostic
.Error($"Unexpected token '{token.GetType().Name}' in expression")
.WithHelp("Expected literal, identifier, or parenthesized expression")
.At(token)
.Build())
};
return ParsePostfixOperators(expr);
}
private ExpressionSyntax ParseBuiltinFunction(int startIndex)
{
var name = ExpectIdentifier();
ExpectSymbol(Symbol.OpenParen);
switch (name.Value)
{
case "size":
{
var type = ParseType();
ExpectSymbol(Symbol.CloseParen);
return new SizeBuiltinSyntax(GetTokens(startIndex), type);
}
case "interpret":
{
var type = ParseType();
ExpectSymbol(Symbol.Comma);
var expression = ParseExpression();
ExpectSymbol(Symbol.CloseParen);
return new InterpretBuiltinSyntax(GetTokens(startIndex), type, expression);
}
case "floatToInt":
{
var type = ParseType();
ExpectSymbol(Symbol.Comma);
var expression = ParseExpression();
ExpectSymbol(Symbol.CloseParen);
return new FloatToIntBuiltinSyntax(GetTokens(startIndex), type, expression);
}
default:
{
throw new ParseException(Diagnostic.Error($"Unknown builtin {name.Value}").At(name).Build());
}
}
}
private ExpressionSyntax ParseIdentifier(int startIndex, IdentifierToken identifier)
{
if (TryExpectSymbol(Symbol.DoubleColon))
{
var name = ExpectIdentifier();
return new ModuleIdentifierSyntax(GetTokens(startIndex), identifier.Value, name.Value);
}
return new LocalIdentifierSyntax(GetTokens(startIndex), identifier.Value);
}
private ExpressionSyntax ParseParenthesizedExpression()
{
var expression = ParseExpression();
ExpectSymbol(Symbol.CloseParen);
return expression;
}
private ExpressionSyntax ParsePostfixOperators(ExpressionSyntax expr)
{
var startIndex = _tokenIndex;
while (HasToken)
{
if (TryExpectSymbol(Symbol.Ampersand))
{
expr = new AddressOfSyntax(GetTokens(startIndex), expr);
continue;
}
if (TryExpectSymbol(Symbol.Caret))
{
expr = new DereferenceSyntax(GetTokens(startIndex), expr);
continue;
}
if (TryExpectSymbol(Symbol.Period))
{
var member = ExpectIdentifier().Value;
expr = new MemberAccessSyntax(GetTokens(startIndex), expr, member);
continue;
}
if (TryExpectSymbol(Symbol.OpenBracket))
{
var index = ParseExpression();
ExpectSymbol(Symbol.CloseBracket);
expr = new ArrayIndexAccessSyntax(GetTokens(startIndex), expr, index);
continue;
}
if (TryExpectSymbol(Symbol.OpenParen))
{
var parameters = new List<ExpressionSyntax>();
while (!TryExpectSymbol(Symbol.CloseParen))
{
parameters.Add(ParseExpression());
if (!TryExpectSymbol(Symbol.Comma))
{
ExpectSymbol(Symbol.CloseParen);
break;
}
}
expr = new FuncCallSyntax(GetTokens(startIndex), expr, parameters);
continue;
}
break;
}
return expr;
}
private ExpressionSyntax ParseArrayInitializer(int startIndex)
{
var capacity = ParseExpression();
ExpectSymbol(Symbol.CloseBracket);
var type = ParseType();
return new ArrayInitializerSyntax(GetTokens(startIndex), capacity, type);
}
private StructInitializerSyntax ParseStructInitializer(int startIndex)
{
TypeSyntax? type = null;
if (!TryExpectSymbol(Symbol.OpenBrace))
{
type = ParseType();
ExpectSymbol(Symbol.OpenBrace);
}
var initializers = ParseStructInitializerBody();
return new StructInitializerSyntax(GetTokens(startIndex), type, initializers);
}
private Dictionary<string, ExpressionSyntax> ParseStructInitializerBody()
{
Dictionary<string, ExpressionSyntax> initializers = [];
while (!TryExpectSymbol(Symbol.CloseBrace))
{
var name = ExpectIdentifier().Value;
ExpectSymbol(Symbol.Assign);
var value = ParseExpression();
initializers.Add(name, value);
}
return initializers;
}
private BlockSyntax ParseBlock()
{
var startIndex = _tokenIndex;
ExpectSymbol(Symbol.OpenBrace);
return ParseBlock(startIndex);
}
private BlockSyntax ParseBlock(int startIndex)
{
List<StatementSyntax> statements = [];
while (!TryExpectSymbol(Symbol.CloseBrace))
{
try
{
statements.Add(ParseStatement());
}
catch (ParseException ex)
{
Diagnostics.Add(ex.Diagnostic);
if (HasToken)
{
Next();
}
else
{
break;
}
}
}
return new BlockSyntax(GetTokens(startIndex), statements);
}
private TypeSyntax ParseType()
{
var startIndex = _tokenIndex;
if (TryExpectIdentifier(out var name))
{
if (name.Value[0] == 'u' && int.TryParse(name.Value[1..], out var size))
{
if (size is not 8 and not 16 and not 32 and not 64)
{
throw new ParseException(Diagnostic
.Error("Arbitrary uint size is not supported")
.WithHelp("Use u8, u16, u32 or u64")
.At(name)
.Build());
}
return new IntTypeSyntax(GetTokens(startIndex), false, size);
}
if (name.Value[0] == 'i' && int.TryParse(name.Value[1..], out size))
{
if (size is not 8 and not 16 and not 32 and not 64)
{
throw new ParseException(Diagnostic
.Error("Arbitrary int size is not supported")
.WithHelp("Use i8, i16, i32 or i64")
.At(name)
.Build());
}
return new IntTypeSyntax(GetTokens(startIndex), true, size);
}
if (name.Value[0] == 'f' && int.TryParse(name.Value[1..], out size))
{
if (size is not 32 and not 64)
{
throw new ParseException(Diagnostic
.Error("Arbitrary float size is not supported")
.WithHelp("Use f32 or f64")
.At(name)
.Build());
}
return new FloatTypeSyntax(GetTokens(startIndex), size);
}
switch (name.Value)
{
case "void":
return new VoidTypeSyntax(GetTokens(startIndex));
case "string":
return new StringTypeSyntax(GetTokens(startIndex));
case "cstring":
return new CStringTypeSyntax(GetTokens(startIndex));
case "bool":
return new BoolTypeSyntax(GetTokens(startIndex));
default:
{
var module = _moduleName;
if (TryExpectSymbol(Symbol.DoubleColon))
{
var customTypeName = ExpectIdentifier();
module = name.Value;
name = customTypeName;
}
return new CustomTypeSyntax(GetTokens(startIndex), module, name.Value);
}
}
}
if (TryExpectSymbol(Symbol.Caret))
{
var baseType = ParseType();
return new PointerTypeSyntax(GetTokens(startIndex), baseType);
}
if (TryExpectSymbol(Symbol.Func))
{
ExpectSymbol(Symbol.OpenParen);
List<TypeSyntax> parameters = [];
while (!TryExpectSymbol(Symbol.CloseParen))
{
parameters.Add(ParseType());
if (!TryExpectSymbol(Symbol.Comma))
{
ExpectSymbol(Symbol.CloseParen);
break;
}
}
var returnType = TryExpectSymbol(Symbol.Colon)
? ParseType()
: new VoidTypeSyntax([]);
return new FuncTypeSyntax(GetTokens(startIndex), parameters, returnType);
}
if (TryExpectSymbol(Symbol.OpenBracket))
{
if (TryExpectIntLiteral(out var intLiteral))
{
ExpectSymbol(Symbol.CloseBracket);
var baseType = ParseType();
return new ConstArrayTypeSyntax(GetTokens(startIndex), baseType, Convert.ToInt64(intLiteral.Value, intLiteral.Base));
}
else if (TryExpectSymbol(Symbol.QuestionMark))
{
ExpectSymbol(Symbol.CloseBracket);
var baseType = ParseType();
return new ArrayTypeSyntax(GetTokens(startIndex), baseType);
}
else
{
ExpectSymbol(Symbol.CloseBracket);
var baseType = ParseType();
return new SliceTypeSyntax(GetTokens(startIndex), baseType);
}
}
throw new ParseException(Diagnostic
.Error("Invalid type syntax")
.WithHelp("Expected type name, '^' for pointer, or '[]' for array")
.At(CurrentToken)
.Build());
}
private Token ExpectToken()
{
if (!HasToken)
{
throw new ParseException(Diagnostic
.Error("Unexpected end of file")
.WithHelp("Expected more tokens to complete the syntax")
.At(_tokens[^1])
.Build());
}
var token = CurrentToken!;
Next();
return token;
}
private SymbolToken ExpectSymbol()
{
var token = ExpectToken();
if (token is not SymbolToken symbol)
{
throw new ParseException(Diagnostic
.Error($"Expected symbol, but found {token.GetType().Name}")
.WithHelp("This position requires a symbol like '(', ')', '{', '}', etc.")
.At(token)
.Build());
}
return symbol;
}
private void ExpectSymbol(Symbol expectedSymbol)
{
var token = ExpectSymbol();
if (token.Symbol != expectedSymbol)
{
throw new ParseException(Diagnostic
.Error($"Expected '{expectedSymbol}', but found '{token.Symbol}'")
.WithHelp($"Insert '{expectedSymbol}' here")
.At(token)
.Build());
}
}
private bool TryExpectSymbol(out Symbol symbol)
{
if (CurrentToken is SymbolToken symbolToken)
{
Next();
symbol = symbolToken.Symbol;
return true;
}
symbol = default;
return false;
}
private bool TryExpectSymbol(Symbol symbol)
{
if (CurrentToken is SymbolToken symbolToken && symbolToken.Symbol == symbol)
{
Next();
return true;
}
return false;
}
private bool TryExpectIdentifier([NotNullWhen(true)] out IdentifierToken? identifier)
{
if (CurrentToken is IdentifierToken identifierToken)
{
identifier = identifierToken;
Next();
return true;
}
identifier = null;
return false;
}
private IdentifierToken ExpectIdentifier()
{
var token = ExpectToken();
if (token is not IdentifierToken identifier)
{
throw new ParseException(Diagnostic
.Error($"Expected identifier, but found {token.GetType().Name}")
.WithHelp("Provide a valid identifier name here")
.At(token)
.Build());
}
return identifier;
}
private bool TryExpectIntLiteral([NotNullWhen(true)] out IntLiteralToken? stringLiteral)
{
if (CurrentToken is IntLiteralToken token)
{
stringLiteral = token;
Next();
return true;
}
stringLiteral = null;
return false;
}
private StringLiteralToken ExpectStringLiteral()
{
var token = ExpectToken();
if (token is not StringLiteralToken identifier)
{
throw new ParseException(Diagnostic
.Error($"Expected string literal, but found {token.GetType().Name}")
.WithHelp("Provide a valid string literal")
.At(token)
.Build());
}
return identifier;
}
private void Next()
{
_tokenIndex++;
}
private List<Token> GetTokens(int tokenStartIndex)
{
return _tokens.Skip(tokenStartIndex).Take(_tokenIndex - tokenStartIndex).ToList();
}
}
public record SyntaxTree(List<DefinitionSyntax> Definitions, string ModuleName, List<string> Imports);
public class ParseException : Exception
{
public Diagnostic Diagnostic { get; }
public ParseException(Diagnostic diagnostic) : base(diagnostic.Message)
{
Diagnostic = diagnostic;
}
}